From scoder at codespeak.net Sun Dec 2 11:32:01 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 2 Dec 2007 11:32:01 +0100 (CET)
Subject: [Lxml-checkins] r49263 - lxml/trunk
Message-ID: <20071202103201.7407F813C@code0.codespeak.net>
Author: scoder
Date: Sun Dec 2 11:32:00 2007
New Revision: 49263
Modified:
lxml/trunk/TODO.txt
Log:
cleanup
Modified: lxml/trunk/TODO.txt
==============================================================================
--- lxml/trunk/TODO.txt (original)
+++ lxml/trunk/TODO.txt Sun Dec 2 11:32:00 2007
@@ -22,12 +22,6 @@
e.g. missing namespace mappings in XPath
-ElementTree
------------
-
-* _setroot(), even though this is not strictly a public method.
-
-
QName
-----
From scoder at codespeak.net Sun Dec 2 11:43:22 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 2 Dec 2007 11:43:22 +0100 (CET)
Subject: [Lxml-checkins] r49264 - lxml/trunk/doc
Message-ID: <20071202104322.86A5F8142@code0.codespeak.net>
Author: scoder
Date: Sun Dec 2 11:43:22 2007
New Revision: 49264
Modified:
lxml/trunk/doc/performance.txt
Log:
doc cleanup
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Sun Dec 2 11:43:22 2007
@@ -598,7 +598,7 @@
the Python objects, thus trading memory for speed. Just create a cache
dictionary and run::
- cache[root] = list(root.getiterator())
+ cache[root] = list(root.iter())
after parsing and::
From scoder at codespeak.net Sun Dec 2 13:30:07 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 2 Dec 2007 13:30:07 +0100 (CET)
Subject: [Lxml-checkins] r49273 - lxml/trunk/src/lxml
Message-ID: <20071202123007.7B6A380C9@code0.codespeak.net>
Author: scoder
Date: Sun Dec 2 13:30:07 2007
New Revision: 49273
Modified:
lxml/trunk/src/lxml/cstd.pxd
lxml/trunk/src/lxml/lxml.etree.pyx
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/python.pxd
lxml/trunk/src/lxml/xmlerror.pxi
lxml/trunk/src/lxml/xpath.pxi
lxml/trunk/src/lxml/xslt.pxi
Log:
use 'with gil/nogil' where appropriate instead of acuiring/releasing the GIL by hand
Modified: lxml/trunk/src/lxml/cstd.pxd
==============================================================================
--- lxml/trunk/src/lxml/cstd.pxd (original)
+++ lxml/trunk/src/lxml/cstd.pxd Sun Dec 2 13:30:07 2007
@@ -15,6 +15,10 @@
cdef void* memcpy(void* dest, void* src, size_t len)
cdef void* memset(void* s, int c, size_t len)
+cdef extern from "stdlib.h":
+ cdef void* malloc(size_t size)
+ cdef void free(void* ptr)
+
cdef extern from "stdarg.h":
ctypedef void *va_list
void va_start(va_list ap, void *last)
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Sun Dec 2 13:30:07 2007
@@ -1181,7 +1181,6 @@
cdef _Element NEW_ELEMENT "PY_NEW" (object t)
cdef _Element _elementFactory(_Document doc, xmlNode* c_node):
- cdef python.PyThreadState* state
cdef _Element result
result = getProxy(c_node)
if result is not None:
@@ -1190,9 +1189,9 @@
return None
if config.ENABLE_THREADING:
- state = python.PyEval_SaveThread()
- python.PyThread_acquire_lock(ELEMENT_CREATION_LOCK, python.WAIT_LOCK)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ python.PyThread_acquire_lock(
+ ELEMENT_CREATION_LOCK, python.WAIT_LOCK)
result = getProxy(c_node)
if result is not None:
python.PyThread_release_lock(ELEMENT_CREATION_LOCK)
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Sun Dec 2 13:30:07 2007
@@ -409,13 +409,11 @@
xmlparser.xmlClearParserCtxt(self._c_ctxt)
cdef int prepare(self) except -1:
- cdef python.PyThreadState* state
cdef int result
if config.ENABLE_THREADING and self._lock is not NULL:
- state = python.PyEval_SaveThread()
- result = python.PyThread_acquire_lock(
- self._lock, python.WAIT_LOCK)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ result = python.PyThread_acquire_lock(
+ self._lock, python.WAIT_LOCK)
if result == 0:
raise ParserError, "parser locking failed"
self._error_log.connect()
Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd (original)
+++ lxml/trunk/src/lxml/python.pxd Sun Dec 2 13:30:07 2007
@@ -95,12 +95,6 @@
cdef object PyErr_NoMemory()
cdef object PyErr_SetFromErrno(object type)
- ctypedef enum PyGILState_STATE:
- PyGILState_LOCKED
- PyGILState_UNLOCKED
-
- cdef PyGILState_STATE PyGILState_Ensure()
- cdef void PyGILState_Release(PyGILState_STATE state)
cdef PyThreadState* PyEval_SaveThread()
cdef void PyEval_RestoreThread(PyThreadState* state)
cdef PyObject* PyThreadState_GetDict()
@@ -109,7 +103,7 @@
ctypedef void* PyThread_type_lock
cdef PyThread_type_lock PyThread_allocate_lock()
cdef void PyThread_free_lock(PyThread_type_lock lock)
- cdef int PyThread_acquire_lock(PyThread_type_lock lock, int mode)
+ cdef int PyThread_acquire_lock(PyThread_type_lock lock, int mode) nogil
cdef void PyThread_release_lock(PyThread_type_lock lock)
cdef long PyThread_get_thread_ident()
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Sun Dec 2 13:30:07 2007
@@ -370,7 +370,7 @@
# local log functions: forward error to logger object
-cdef void _forwardError(void* c_log_handler, xmlerror.xmlError* error):
+cdef void _forwardError(void* c_log_handler, xmlerror.xmlError* error) with gil:
cdef _BaseErrorLog log_handler
if c_log_handler is not NULL:
log_handler = <_BaseErrorLog>c_log_handler
@@ -378,19 +378,15 @@
log_handler = __GLOBAL_ERROR_LOG
log_handler._receive(error)
-cdef void _receiveError(void* c_log_handler, xmlerror.xmlError* error):
+cdef void _receiveError(void* c_log_handler, xmlerror.xmlError* error) nogil:
# no Python objects here, may be called without thread context !
# when we declare a Python object, Pyrex will INCREF(None) !
- cdef python.PyGILState_STATE gil_state
if __DEBUG != 0:
- gil_state = python.PyGILState_Ensure()
_forwardError(c_log_handler, error)
- python.PyGILState_Release(gil_state)
-cdef void _receiveXSLTError(void* c_log_handler, char* msg, ...):
+cdef void _receiveXSLTError(void* c_log_handler, char* msg, ...) nogil:
# no Python objects here, may be called without thread context !
# when we declare a Python object, Pyrex will INCREF(None) !
- cdef python.PyGILState_STATE gil_state
cdef xmlerror.xmlError c_error
cdef cstd.va_list args
cdef char* c_text
@@ -422,7 +418,6 @@
c_element = NULL
cstd.va_end(args)
- gil_state = python.PyGILState_Ensure()
c_message = NULL
if c_text is NULL:
c_error.message = ''
@@ -431,7 +426,7 @@
else:
text_size = cstd.strlen(c_text)
element_size = cstd.strlen(c_element)
- c_message = python.PyMem_Malloc(
+ c_message = cstd.malloc(
(text_size + 12 + element_size + 1) * sizeof(char))
cstd.sprintf(c_message, "%s, element '%s'", c_text, c_element)
c_error.message = c_message
@@ -444,8 +439,7 @@
_forwardError(c_log_handler, &c_error)
if c_message is not NULL:
- python.PyMem_Free(c_error.message)
- python.PyGILState_Release(gil_state)
+ cstd.free(c_error.message)
################################################################################
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Sun Dec 2 13:30:07 2007
@@ -148,13 +148,11 @@
return c == c'/'
cdef int _lock(self) except -1:
- cdef python.PyThreadState* state
cdef int result
if config.ENABLE_THREADING and self._eval_lock != NULL:
- state = python.PyEval_SaveThread()
- result = python.PyThread_acquire_lock(
- self._eval_lock, python.WAIT_LOCK)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ result = python.PyThread_acquire_lock(
+ self._eval_lock, python.WAIT_LOCK)
if result == 0:
raise ParserError, "parser locking failed"
return 0
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Sun Dec 2 13:30:07 2007
@@ -64,16 +64,8 @@
context._parser = parser
context._c_style_doc = NULL
-cdef xmlDoc* _xslt_resolve_stylesheet(char* c_uri, void* context):
- cdef xmlDoc* c_doc
- c_doc = (<_XSLTResolverContext>context)._c_style_doc
- if c_doc is not NULL and c_doc.URL is not NULL:
- if cstd.strcmp(c_uri, c_doc.URL) == 0:
- return _copyDoc(c_doc, 1)
- return NULL
-
cdef xmlDoc* _xslt_resolve_from_python(char* c_uri, void* c_context,
- int parse_options, int* error):
+ int parse_options, int* error) with gil:
# call the Python document loaders
cdef _XSLTResolverContext context
cdef _ResolverRegistry resolvers
@@ -82,6 +74,14 @@
error[0] = 0
context = <_XSLTResolverContext>c_context
+
+ # shortcut if we resolve the stylesheet itself
+ c_doc = context._c_style_doc
+ if c_doc is not NULL and c_doc.URL is not NULL:
+ if cstd.strcmp(c_uri, c_doc.URL) == 0:
+ return _copyDoc(c_doc, 1)
+
+ # delegate to the Python resolvers
try:
resolvers = context._resolvers
if cstd.strncmp('string://', c_uri, 9) == 0:
@@ -115,7 +115,7 @@
return NULL
cdef void _xslt_store_resolver_exception(char* c_uri, void* context,
- xslt.xsltLoadType c_type):
+ xslt.xsltLoadType c_type) with gil:
message = "Cannot resolve URI %s" % c_uri
if c_type == xslt.XSLT_LOAD_DOCUMENT:
exception = XSLTApplyError(message)
@@ -125,14 +125,13 @@
cdef xmlDoc* _xslt_doc_loader(char* c_uri, tree.xmlDict* c_dict,
int parse_options, void* c_ctxt,
- xslt.xsltLoadType c_type):
+ xslt.xsltLoadType c_type) nogil:
# no Python objects here, may be called without thread context !
# when we declare a Python object, Pyrex will INCREF(None) !
cdef xmlDoc* c_doc
cdef xmlDoc* result
cdef void* c_pcontext
cdef int error
- cdef python.PyGILState_STATE gil_state
# find resolver contexts of stylesheet and transformed doc
if c_type == xslt.XSLT_LOAD_DOCUMENT:
# transformation time
@@ -148,14 +147,6 @@
return XSLT_DOC_DEFAULT_LOADER(
c_uri, c_dict, parse_options, c_ctxt, c_type)
- gil_state = python.PyGILState_Ensure()
- c_doc = _xslt_resolve_stylesheet(c_uri, c_pcontext)
- if c_doc is not NULL:
- python.PyGILState_Release(gil_state)
- if c_type == xslt.XSLT_LOAD_STYLESHEET:
- c_doc._private = c_pcontext
- return c_doc
-
c_doc = _xslt_resolve_from_python(c_uri, c_pcontext, parse_options, &error)
if c_doc is NULL and not error:
c_doc = XSLT_DOC_DEFAULT_LOADER(
@@ -163,7 +154,6 @@
if c_doc is NULL:
_xslt_store_resolver_exception(c_uri, c_pcontext, c_type)
- python.PyGILState_Release(gil_state)
if c_doc is not NULL and c_type == xslt.XSLT_LOAD_STYLESHEET:
c_doc._private = c_pcontext
return c_doc
From scoder at codespeak.net Sun Dec 2 14:45:02 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 2 Dec 2007 14:45:02 +0100 (CET)
Subject: [Lxml-checkins] r49277 - lxml/trunk/src/lxml
Message-ID: <20071202134502.A4F278169@code0.codespeak.net>
Author: scoder
Date: Sun Dec 2 14:45:01 2007
New Revision: 49277
Modified:
lxml/trunk/src/lxml/c14n.pxd
lxml/trunk/src/lxml/cstd.pxd
lxml/trunk/src/lxml/dtd.pxi
lxml/trunk/src/lxml/dtdvalid.pxd
lxml/trunk/src/lxml/etreepublic.pxd
lxml/trunk/src/lxml/htmlparser.pxd
lxml/trunk/src/lxml/lxml.etree.pyx
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/relaxng.pxd
lxml/trunk/src/lxml/relaxng.pxi
lxml/trunk/src/lxml/schematron.pxd
lxml/trunk/src/lxml/schematron.pxi
lxml/trunk/src/lxml/serializer.pxi
lxml/trunk/src/lxml/tree.pxd
lxml/trunk/src/lxml/xinclude.pxd
lxml/trunk/src/lxml/xmlerror.pxd
lxml/trunk/src/lxml/xmlparser.pxd
lxml/trunk/src/lxml/xmlschema.pxd
lxml/trunk/src/lxml/xmlschema.pxi
lxml/trunk/src/lxml/xpath.pxd
lxml/trunk/src/lxml/xpath.pxi
lxml/trunk/src/lxml/xslt.pxd
lxml/trunk/src/lxml/xslt.pxi
Log:
use 'with gil/nogil' where appropriate instead of acquiring/releasing the GIL by hand
Modified: lxml/trunk/src/lxml/c14n.pxd
==============================================================================
--- lxml/trunk/src/lxml/c14n.pxd (original)
+++ lxml/trunk/src/lxml/c14n.pxd Sun Dec 2 14:45:01 2007
@@ -7,7 +7,7 @@
int exclusive,
char** inclusive_ns_prefixes,
int with_comments,
- char** doc_txt_ptr)
+ char** doc_txt_ptr) nogil
cdef int xmlC14NDocSave(xmlDoc* doc,
xmlNodeSet* nodes,
@@ -15,12 +15,12 @@
char** inclusive_ns_prefixes,
int with_comments,
char* filename,
- int compression)
+ int compression) nogil
cdef int xmlC14NDocSaveTo(xmlDoc* doc,
xmlNodeSet* nodes,
int exclusive,
char** inclusive_ns_prefixes,
int with_comments,
- xmlOutputBuffer* buffer)
-
+ xmlOutputBuffer* buffer) nogil
+
Modified: lxml/trunk/src/lxml/cstd.pxd
==============================================================================
--- lxml/trunk/src/lxml/cstd.pxd (original)
+++ lxml/trunk/src/lxml/cstd.pxd Sun Dec 2 14:45:01 2007
@@ -1,29 +1,29 @@
cdef extern from "stdio.h":
ctypedef struct FILE
- cdef int sprintf(char* str, char* format, ...)
- cdef int printf(char* str)
+ cdef int sprintf(char* str, char* format, ...) nogil
+ cdef int printf(char* str) nogil
cdef extern from "string.h":
ctypedef int size_t
- cdef int strlen(char* s)
- cdef char* strstr(char* haystack, char* needle)
- cdef char* strchr(char* haystack, int needle)
- cdef char* strrchr(char* haystack, int needle)
- cdef int strcmp(char* s1, char* s2)
- cdef int strncmp(char* s1, char* s2, size_t len)
- cdef void* memcpy(void* dest, void* src, size_t len)
- cdef void* memset(void* s, int c, size_t len)
+ cdef int strlen(char* s) nogil
+ cdef char* strstr(char* haystack, char* needle) nogil
+ cdef char* strchr(char* haystack, int needle) nogil
+ cdef char* strrchr(char* haystack, int needle) nogil
+ cdef int strcmp(char* s1, char* s2) nogil
+ cdef int strncmp(char* s1, char* s2, size_t len) nogil
+ cdef void* memcpy(void* dest, void* src, size_t len) nogil
+ cdef void* memset(void* s, int c, size_t len) nogil
cdef extern from "stdlib.h":
- cdef void* malloc(size_t size)
- cdef void free(void* ptr)
+ cdef void* malloc(size_t size) nogil
+ cdef void free(void* ptr) nogil
cdef extern from "stdarg.h":
ctypedef void *va_list
- void va_start(va_list ap, void *last)
- void va_end(va_list ap)
+ void va_start(va_list ap, void *last) nogil
+ void va_end(va_list ap) nogil
cdef extern from "etree_defs.h":
- cdef int va_int(va_list ap)
- cdef char *va_charptr(va_list ap)
+ cdef int va_int(va_list ap) nogil
+ cdef char *va_charptr(va_list ap) nogil
Modified: lxml/trunk/src/lxml/dtd.pxi
==============================================================================
--- lxml/trunk/src/lxml/dtd.pxi (original)
+++ lxml/trunk/src/lxml/dtd.pxi Sun Dec 2 14:45:01 2007
@@ -53,7 +53,6 @@
Returns true if the document is valid, false if not.
"""
- cdef python.PyThreadState* state
cdef _Document doc
cdef _Element root_node
cdef xmlDoc* c_doc
@@ -70,9 +69,8 @@
raise DTDError, "Failed to create validation context"
c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
- state = python.PyEval_SaveThread()
- ret = dtdvalid.xmlValidateDtd(valid_ctxt, c_doc, self._c_dtd)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ ret = dtdvalid.xmlValidateDtd(valid_ctxt, c_doc, self._c_dtd)
_destroyFakeDoc(doc._c_doc, c_doc)
dtdvalid.xmlFreeValidCtxt(valid_ctxt)
Modified: lxml/trunk/src/lxml/dtdvalid.pxd
==============================================================================
--- lxml/trunk/src/lxml/dtdvalid.pxd (original)
+++ lxml/trunk/src/lxml/dtdvalid.pxd Sun Dec 2 14:45:01 2007
@@ -4,7 +4,7 @@
cdef extern from "libxml/valid.h":
ctypedef struct xmlValidCtxt
- cdef xmlValidCtxt* xmlNewValidCtxt()
- cdef void xmlFreeValidCtxt(xmlValidCtxt* cur)
+ cdef xmlValidCtxt* xmlNewValidCtxt() nogil
+ cdef void xmlFreeValidCtxt(xmlValidCtxt* cur) nogil
- cdef int xmlValidateDtd(xmlValidCtxt* ctxt, xmlDoc* doc, xmlDtd* dtd)
+ cdef int xmlValidateDtd(xmlValidCtxt* ctxt, xmlDoc* doc, xmlDtd* dtd) nogil
Modified: lxml/trunk/src/lxml/etreepublic.pxd
==============================================================================
--- lxml/trunk/src/lxml/etreepublic.pxd (original)
+++ lxml/trunk/src/lxml/etreepublic.pxd Sun Dec 2 14:45:01 2007
@@ -5,16 +5,16 @@
cdef extern from "etree_defs.h":
# test if c_node is considered an Element (i.e. Element, Comment, etc.)
- cdef bint _isElement(tree.xmlNode* c_node)
+ cdef bint _isElement(tree.xmlNode* c_node) nogil
# return the namespace URI of the node or NULL
- cdef char* _getNs(tree.xmlNode* node)
+ cdef char* _getNs(tree.xmlNode* node) nogil
# pair of macros for tree traversal
cdef void BEGIN_FOR_EACH_ELEMENT_FROM(tree.xmlNode* tree_top,
tree.xmlNode* start_node,
- int start_node_inclusive)
- cdef void END_FOR_EACH_ELEMENT_FROM(tree.xmlNode* start_node)
+ int start_node_inclusive) nogil
+ cdef void END_FOR_EACH_ELEMENT_FROM(tree.xmlNode* start_node) nogil
cdef extern from "lxml.etree_api.h":
@@ -129,23 +129,23 @@
# XML node helper functions
# check if the element has at least one child
- cdef bint hasChild(tree.xmlNode* c_node)
+ cdef bint hasChild(tree.xmlNode* c_node) nogil
# find child element number 'index' (supports negative indexes)
cdef tree.xmlNode* findChild(tree.xmlNode* c_node,
- Py_ssize_t index)
+ Py_ssize_t index) nogil
# find child element number 'index' starting at first one
cdef tree.xmlNode* findChildForwards(tree.xmlNode* c_node,
- Py_ssize_t index)
+ Py_ssize_t index) nogil
# find child element number 'index' starting at last one
cdef tree.xmlNode* findChildBackwards(tree.xmlNode* c_node,
- Py_ssize_t index)
+ Py_ssize_t index) nogil
# return next/previous sibling element of the node
- cdef tree.xmlNode* nextElement(tree.xmlNode* c_node)
- cdef tree.xmlNode* previousElement(tree.xmlNode* c_node)
+ cdef tree.xmlNode* nextElement(tree.xmlNode* c_node) nogil
+ cdef tree.xmlNode* previousElement(tree.xmlNode* c_node) nogil
##########################################################################
# iterators
@@ -191,10 +191,10 @@
cdef object namespacedNameFromNsName(char* c_ns, char* c_tag)
# check if the node has a text value (which may be '')
- cdef bint hasText(tree.xmlNode* c_node)
+ cdef bint hasText(tree.xmlNode* c_node) nogil
# check if the node has a tail value (which may be '')
- cdef bint hasTail(tree.xmlNode* c_node)
+ cdef bint hasTail(tree.xmlNode* c_node) nogil
# get the text content of an element (or None)
cdef object textOf(tree.xmlNode* c_node)
Modified: lxml/trunk/src/lxml/htmlparser.pxd
==============================================================================
--- lxml/trunk/src/lxml/htmlparser.pxd (original)
+++ lxml/trunk/src/lxml/htmlparser.pxd Sun Dec 2 14:45:01 2007
@@ -14,30 +14,34 @@
HTML_PARSE_RECOVER # Relaxed parsing
HTML_PARSE_COMPACT # compact small text nodes
- cdef xmlParserCtxt* htmlCreateMemoryParserCtxt(char* buffer, int size)
- cdef xmlParserCtxt* htmlCreateFileParserCtxt(char* filename, char* encoding)
+ cdef xmlParserCtxt* htmlCreateMemoryParserCtxt(
+ char* buffer, int size) nogil
+ cdef xmlParserCtxt* htmlCreateFileParserCtxt(
+ char* filename, char* encoding) nogil
cdef xmlParserCtxt* htmlCreatePushParserCtxt(xmlSAXHandler* sax,
void* user_data,
char* chunk, int size,
- char* filename, int enc)
- cdef void htmlFreeParserCtxt(xmlParserCtxt* ctxt)
- cdef void htmlCtxtReset(xmlParserCtxt* ctxt)
- cdef int htmlCtxtUseOptions(xmlParserCtxt* ctxt, int options)
- cdef int htmlParseDocument(xmlParserCtxt* ctxt)
+ char* filename, int enc) nogil
+ cdef void htmlFreeParserCtxt(xmlParserCtxt* ctxt) nogil
+ cdef void htmlCtxtReset(xmlParserCtxt* ctxt) nogil
+ cdef int htmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) nogil
+ cdef int htmlParseDocument(xmlParserCtxt* ctxt) nogil
cdef int htmlParseChunk(xmlParserCtxt* ctxt,
- char* chunk, int size, int terminate)
+ char* chunk, int size, int terminate) nogil
cdef xmlDoc* htmlCtxtReadFile(xmlParserCtxt* ctxt,
char* filename, char* encoding,
- int options)
+ int options) nogil
cdef xmlDoc* htmlCtxtReadDoc(xmlParserCtxt* ctxt,
char* buffer, char* URL, char* encoding,
- int options)
+ int options) nogil
cdef xmlDoc* htmlCtxtReadIO(xmlParserCtxt* ctxt,
xmlInputReadCallback ioread,
xmlInputCloseCallback ioclose,
void* ioctx,
- char* URL, char* encoding, int options)
+ char* URL, char* encoding,
+ int options) nogil
cdef xmlDoc* htmlCtxtReadMemory(xmlParserCtxt* ctxt,
char* buffer, int size,
- char* filename, char* encoding, int options)
+ char* filename, char* encoding,
+ int options) nogil
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Sun Dec 2 14:45:01 2007
@@ -1588,8 +1588,8 @@
Note that XInclude does not support custom resolvers in Python space
due to restrictions of libxml2 <= 2.6.29.
"""
- cdef python.PyThreadState* state
cdef int result
+ self._assertHasRoot()
# We cannot pass the XML_PARSE_NOXINCNODE option as this would free
# the XInclude nodes - there may still be Python references to them!
# Therefore, we allow XInclude nodes to be converted to
@@ -1597,16 +1597,14 @@
# siblings. Tree traversal will simply ignore them as they are not
# typed as elements. The included fragment is added between the two,
# i.e. as a sibling, which does not conflict with traversal.
- self._assertHasRoot()
- state = python.PyEval_SaveThread()
- if self._context_node._doc._parser is not None:
- result = xinclude.xmlXIncludeProcessTreeFlags(
- self._context_node._c_node,
- self._context_node._doc._parser._parse_options)
- else:
- result = xinclude.xmlXIncludeProcessTree(
- self._context_node._c_node)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ if self._context_node._doc._parser is not None:
+ result = xinclude.xmlXIncludeProcessTreeFlags(
+ self._context_node._c_node,
+ self._context_node._doc._parser._parse_options)
+ else:
+ result = xinclude.xmlXIncludeProcessTree(
+ self._context_node._c_node)
if result == -1:
raise XIncludeError, "XInclude processing failed"
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Sun Dec 2 14:45:01 2007
@@ -228,7 +228,6 @@
return xmlparser.xmlNewIOInputStream(ctxt, c_buffer, 0)
cdef xmlDoc* _readDoc(self, xmlparser.xmlParserCtxt* ctxt, int options):
- cdef python.PyThreadState* state
cdef xmlDoc* result
cdef char* c_encoding
@@ -237,29 +236,24 @@
else:
c_encoding = _cstr(self._encoding)
- state = python.PyEval_SaveThread()
- if ctxt.html:
- result = htmlparser.htmlCtxtReadIO(
- ctxt, _readFilelikeParser, NULL, self,
- self._c_url, c_encoding, options)
- else:
- result = xmlparser.xmlCtxtReadIO(
- ctxt, _readFilelikeParser, NULL, self,
- self._c_url, c_encoding, options)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ if ctxt.html:
+ result = htmlparser.htmlCtxtReadIO(
+ ctxt, _readFilelikeParser, NULL, self,
+ self._c_url, c_encoding, options)
+ else:
+ result = xmlparser.xmlCtxtReadIO(
+ ctxt, _readFilelikeParser, NULL, self,
+ self._c_url, c_encoding, options)
return result
cdef tree.xmlDtd* _readDtd(self):
- cdef python.PyThreadState* state
- cdef tree.xmlDtd* result
cdef xmlparser.xmlParserInputBuffer* c_buffer
c_buffer = xmlparser.xmlAllocParserInputBuffer(0)
c_buffer.context = self
c_buffer.readcallback = _readFilelikeParser
- state = python.PyEval_SaveThread()
- result = xmlparser.xmlIOParseDTD(NULL, c_buffer, 0)
- python.PyEval_RestoreThread(state)
- return result
+ with nogil:
+ return xmlparser.xmlIOParseDTD(NULL, c_buffer, 0)
cdef int copyToBuffer(self, char* c_buffer, int c_size):
cdef char* c_start
@@ -699,7 +693,6 @@
"""Parse unicode document, share dictionary if possible.
"""
cdef _ParserContext context
- cdef python.PyThreadState* state
cdef xmlDoc* result
cdef xmlparser.xmlParserCtxt* pctxt
cdef Py_ssize_t py_buffer_len
@@ -719,16 +712,15 @@
__GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
c_text = python.PyUnicode_AS_DATA(utext)
- state = python.PyEval_SaveThread()
- if self._for_html:
- result = htmlparser.htmlCtxtReadMemory(
- pctxt, c_text, buffer_len, c_filename, _UNICODE_ENCODING,
- self._parse_options)
- else:
- result = xmlparser.xmlCtxtReadMemory(
- pctxt, c_text, buffer_len, c_filename, _UNICODE_ENCODING,
- self._parse_options)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ if self._for_html:
+ result = htmlparser.htmlCtxtReadMemory(
+ pctxt, c_text, buffer_len, c_filename, _UNICODE_ENCODING,
+ self._parse_options)
+ else:
+ result = xmlparser.xmlCtxtReadMemory(
+ pctxt, c_text, buffer_len, c_filename, _UNICODE_ENCODING,
+ self._parse_options)
return context._handleParseResultDoc(self, result, None)
finally:
@@ -739,7 +731,6 @@
"""Parse document, share dictionary if possible.
"""
cdef _ParserContext context
- cdef python.PyThreadState* state
cdef xmlDoc* result
cdef xmlparser.xmlParserCtxt* pctxt
cdef char* c_encoding
@@ -757,16 +748,15 @@
else:
c_encoding = _cstr(self._default_encoding)
- state = python.PyEval_SaveThread()
- if self._for_html:
- result = htmlparser.htmlCtxtReadMemory(
- pctxt, c_text, c_len, c_filename,
- c_encoding, self._parse_options)
- else:
- result = xmlparser.xmlCtxtReadMemory(
- pctxt, c_text, c_len, c_filename,
- c_encoding, self._parse_options)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ if self._for_html:
+ result = htmlparser.htmlCtxtReadMemory(
+ pctxt, c_text, c_len, c_filename,
+ c_encoding, self._parse_options)
+ else:
+ result = xmlparser.xmlCtxtReadMemory(
+ pctxt, c_text, c_len, c_filename,
+ c_encoding, self._parse_options)
return context._handleParseResultDoc(self, result, None)
finally:
@@ -774,7 +764,6 @@
cdef xmlDoc* _parseDocFromFile(self, char* c_filename) except NULL:
cdef _ParserContext context
- cdef python.PyThreadState* state
cdef xmlDoc* result
cdef xmlparser.xmlParserCtxt* pctxt
cdef int orig_options
@@ -793,14 +782,13 @@
c_encoding = _cstr(self._default_encoding)
orig_options = pctxt.options
- state = python.PyEval_SaveThread()
- if self._for_html:
- result = htmlparser.htmlCtxtReadFile(
- pctxt, c_filename, c_encoding, self._parse_options)
- else:
- result = xmlparser.xmlCtxtReadFile(
- pctxt, c_filename, c_encoding, self._parse_options)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ if self._for_html:
+ result = htmlparser.htmlCtxtReadFile(
+ pctxt, c_filename, c_encoding, self._parse_options)
+ else:
+ result = xmlparser.xmlCtxtReadFile(
+ pctxt, c_filename, c_encoding, self._parse_options)
pctxt.options = orig_options # work around libxml2 problem
return context._handleParseResultDoc(self, result, c_filename)
@@ -1630,13 +1618,12 @@
return result
cdef xmlDoc* _copyDoc(xmlDoc* c_doc, int recursive) except NULL:
- cdef python.PyThreadState* state
cdef xmlDoc* result
if recursive:
- state = python.PyEval_SaveThread()
- result = tree.xmlCopyDoc(c_doc, recursive)
- if recursive:
- python.PyEval_RestoreThread(state)
+ with nogil:
+ result = tree.xmlCopyDoc(c_doc, recursive)
+ else:
+ result = tree.xmlCopyDoc(c_doc, 0)
if result is NULL:
python.PyErr_NoMemory()
__GLOBAL_PARSER_CONTEXT.initDocDict(result)
@@ -1644,14 +1631,12 @@
cdef xmlDoc* _copyDocRoot(xmlDoc* c_doc, xmlNode* c_new_root) except NULL:
"Recursively copy the document and make c_new_root the new root node."
- cdef python.PyThreadState* state
cdef xmlDoc* result
cdef xmlNode* c_node
result = tree.xmlCopyDoc(c_doc, 0) # non recursive
__GLOBAL_PARSER_CONTEXT.initDocDict(result)
- state = python.PyEval_SaveThread()
- c_node = tree.xmlDocCopyNode(c_new_root, result, 1) # recursive
- python.PyEval_RestoreThread(state)
+ with nogil:
+ c_node = tree.xmlDocCopyNode(c_new_root, result, 1) # recursive
if c_node is NULL:
python.PyErr_NoMemory()
tree.xmlDocSetRootElement(result, c_node)
Modified: lxml/trunk/src/lxml/relaxng.pxd
==============================================================================
--- lxml/trunk/src/lxml/relaxng.pxd (original)
+++ lxml/trunk/src/lxml/relaxng.pxd Sun Dec 2 14:45:01 2007
@@ -49,12 +49,11 @@
XML_RELAXNG_ERR_ELEMWRONG = 38
XML_RELAXNG_ERR_TEXTWRONG = 39
- cdef xmlRelaxNGValidCtxt* xmlRelaxNGNewValidCtxt(xmlRelaxNG* schema)
- cdef int xmlRelaxNGValidateDoc(xmlRelaxNGValidCtxt* ctxt, xmlDoc* doc)
- cdef xmlRelaxNG* xmlRelaxNGParse(xmlRelaxNGParserCtxt* ctxt)
- cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewParserCtxt(char* URL)
- cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewDocParserCtxt(xmlDoc* doc)
- cdef void xmlRelaxNGFree(xmlRelaxNG* schema)
- cdef void xmlRelaxNGFreeParserCtxt(xmlRelaxNGParserCtxt* ctxt)
- cdef void xmlRelaxNGFreeValidCtxt(xmlRelaxNGValidCtxt* ctxt)
-
+ cdef xmlRelaxNGValidCtxt* xmlRelaxNGNewValidCtxt(xmlRelaxNG* schema) nogil
+ cdef int xmlRelaxNGValidateDoc(xmlRelaxNGValidCtxt* ctxt, xmlDoc* doc) nogil
+ cdef xmlRelaxNG* xmlRelaxNGParse(xmlRelaxNGParserCtxt* ctxt) nogil
+ cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewParserCtxt(char* URL) nogil
+ cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewDocParserCtxt(xmlDoc* doc) nogil
+ cdef void xmlRelaxNGFree(xmlRelaxNG* schema) nogil
+ cdef void xmlRelaxNGFreeParserCtxt(xmlRelaxNGParserCtxt* ctxt) nogil
+ cdef void xmlRelaxNGFreeValidCtxt(xmlRelaxNGValidCtxt* ctxt) nogil
Modified: lxml/trunk/src/lxml/relaxng.pxi
==============================================================================
--- lxml/trunk/src/lxml/relaxng.pxi (original)
+++ lxml/trunk/src/lxml/relaxng.pxi Sun Dec 2 14:45:01 2007
@@ -86,7 +86,6 @@
"""Validate doc using Relax NG.
Returns true if document is valid, false if not."""
- cdef python.PyThreadState* state
cdef _Document doc
cdef _Element root_node
cdef xmlDoc* c_doc
@@ -103,9 +102,8 @@
python.PyErr_NoMemory()
c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
- state = python.PyEval_SaveThread()
- ret = relaxng.xmlRelaxNGValidateDoc(valid_ctxt, c_doc)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ ret = relaxng.xmlRelaxNGValidateDoc(valid_ctxt, c_doc)
_destroyFakeDoc(doc._c_doc, c_doc)
relaxng.xmlRelaxNGFreeValidCtxt(valid_ctxt)
Modified: lxml/trunk/src/lxml/schematron.pxd
==============================================================================
--- lxml/trunk/src/lxml/schematron.pxd (original)
+++ lxml/trunk/src/lxml/schematron.pxd Sun Dec 2 14:45:01 2007
@@ -14,15 +14,17 @@
XML_SCHEMATRON_OUT_BUFFER = 512 # output to a buffer
XML_SCHEMATRON_OUT_IO = 1024 # output to I/O mechanism
- cdef xmlSchematronParserCtxt* xmlSchematronNewDocParserCtxt(xmlDoc* doc)
- cdef xmlSchematronParserCtxt* xmlSchematronNewParserCtxt(char* filename)
- cdef xmlSchematronValidCtxt* xmlSchematronNewValidCtxt(xmlSchematron* schema,
- int options)
+ cdef xmlSchematronParserCtxt* xmlSchematronNewDocParserCtxt(
+ xmlDoc* doc) nogil
+ cdef xmlSchematronParserCtxt* xmlSchematronNewParserCtxt(
+ char* filename) nogil
+ cdef xmlSchematronValidCtxt* xmlSchematronNewValidCtxt(
+ xmlSchematron* schema, int options) nogil
- cdef xmlSchematron* xmlSchematronParse(xmlSchematronParserCtxt* ctxt)
+ cdef xmlSchematron* xmlSchematronParse(xmlSchematronParserCtxt* ctxt) nogil
cdef int xmlSchematronValidateDoc(xmlSchematronValidCtxt* ctxt,
- xmlDoc* instance)
+ xmlDoc* instance) nogil
- cdef void xmlSchematronFreeParserCtxt(xmlSchematronParserCtxt* ctxt)
- cdef void xmlSchematronFreeValidCtxt(xmlSchematronValidCtxt* ctxt)
- cdef void xmlSchematronFree(xmlSchematron* schema)
+ cdef void xmlSchematronFreeParserCtxt(xmlSchematronParserCtxt* ctxt) nogil
+ cdef void xmlSchematronFreeValidCtxt(xmlSchematronValidCtxt* ctxt) nogil
+ cdef void xmlSchematronFree(xmlSchematron* schema) nogil
Modified: lxml/trunk/src/lxml/schematron.pxi
==============================================================================
--- lxml/trunk/src/lxml/schematron.pxi (original)
+++ lxml/trunk/src/lxml/schematron.pxi Sun Dec 2 14:45:01 2007
@@ -117,7 +117,6 @@
"""Validate doc using Schematron.
Returns true if document is valid, false if not."""
- cdef python.PyThreadState* state
cdef _Document doc
cdef _Element root_node
cdef xmlDoc* c_doc
@@ -140,9 +139,8 @@
raise SchematronError, "Failed to create validation context"
c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
- state = python.PyEval_SaveThread()
- ret = schematron.xmlSchematronValidateDoc(valid_ctxt, c_doc)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ ret = schematron.xmlSchematronValidateDoc(valid_ctxt, c_doc)
_destroyFakeDoc(doc._c_doc, c_doc)
schematron.xmlSchematronFreeValidCtxt(valid_ctxt)
Modified: lxml/trunk/src/lxml/serializer.pxi
==============================================================================
--- lxml/trunk/src/lxml/serializer.pxi (original)
+++ lxml/trunk/src/lxml/serializer.pxi Sun Dec 2 14:45:01 2007
@@ -18,11 +18,9 @@
raise ValueError, "unknown output method %r" % method
cdef _textToString(xmlNode* c_node, encoding):
- cdef python.PyThreadState* state
cdef char* c_text
- state = python.PyEval_SaveThread()
- c_text = tree.xmlNodeGetContent(c_node)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ c_text = tree.xmlNodeGetContent(c_node)
if c_text is NULL:
python.PyErr_NoMemory()
@@ -49,7 +47,6 @@
"""Serialize an element to an encoded string representation of its XML
tree.
"""
- cdef python.PyThreadState* state
cdef tree.xmlOutputBuffer* c_buffer
cdef tree.xmlBuffer* c_result_buffer
cdef tree.xmlCharEncodingHandler* enchandler
@@ -77,17 +74,17 @@
tree.xmlCharEncCloseFunc(enchandler)
raise LxmlError, "Failed to create output buffer"
- try:
- state = python.PyEval_SaveThread()
+ with nogil:
_writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_method,
write_xml_declaration, write_complete_document,
pretty_print)
tree.xmlOutputBufferFlush(c_buffer)
- python.PyEval_RestoreThread(state)
if c_buffer.conv is not NULL:
c_result_buffer = c_buffer.conv
else:
c_result_buffer = c_buffer.buffer
+
+ try:
result = python.PyString_FromStringAndSize(
tree.xmlBufferContent(c_result_buffer),
tree.xmlBufferLength(c_result_buffer))
@@ -100,7 +97,6 @@
"""Serialize an element to the Python unicode representation of its XML
tree.
"""
- cdef python.PyThreadState* state
cdef tree.xmlOutputBuffer* c_buffer
cdef tree.xmlBuffer* c_result_buffer
cdef int c_method
@@ -113,16 +109,17 @@
c_buffer = tree.xmlAllocOutputBuffer(NULL)
if c_buffer is NULL:
raise LxmlError, "Failed to create output buffer"
- try:
- state = python.PyEval_SaveThread()
+
+ with nogil:
_writeNodeToBuffer(c_buffer, element._c_node, NULL, c_method, 0,
write_complete_document, pretty_print)
tree.xmlOutputBufferFlush(c_buffer)
- python.PyEval_RestoreThread(state)
if c_buffer.conv is not NULL:
c_result_buffer = c_buffer.conv
else:
c_result_buffer = c_buffer.buffer
+
+ try:
result = python.PyUnicode_DecodeUTF8(
tree.xmlBufferContent(c_result_buffer),
tree.xmlBufferLength(c_result_buffer),
@@ -135,7 +132,7 @@
xmlNode* c_node, char* encoding, int c_method,
bint write_xml_declaration,
bint write_complete_document,
- bint pretty_print):
+ bint pretty_print) nogil:
cdef xmlDoc* c_doc
cdef xmlNode* c_nsdecl_node
c_doc = c_node.doc
@@ -177,7 +174,7 @@
_writeNextSiblings(c_buffer, c_node, encoding, pretty_print)
cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
- char* version, char* encoding):
+ char* version, char* encoding) nogil:
if version is NULL:
version = "1.0"
tree.xmlOutputBufferWriteString(c_buffer, "\n")
cdef void _writeDtdToBuffer(tree.xmlOutputBuffer* c_buffer,
- xmlDoc* c_doc, char* c_root_name, char* encoding):
+ xmlDoc* c_doc, char* c_root_name,
+ char* encoding) nogil:
cdef tree.xmlDtd* c_dtd
cdef xmlNode* c_node
c_dtd = c_doc.intSubset
@@ -222,7 +220,7 @@
tree.xmlOutputBufferWrite(c_buffer, 3, "]>\n")
cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
- char* encoding, bint pretty_print):
+ char* encoding, bint pretty_print) nogil:
"Write the element tail."
c_node = c_node.next
while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE:
@@ -231,7 +229,7 @@
c_node = c_node.next
cdef void _writePrevSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
- char* encoding, bint pretty_print):
+ char* encoding, bint pretty_print) nogil:
cdef xmlNode* c_sibling
if c_node.parent is not NULL and _isElement(c_node.parent):
return
@@ -247,7 +245,7 @@
c_sibling = c_sibling.next
cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
- char* encoding, bint pretty_print):
+ char* encoding, bint pretty_print) nogil:
cdef xmlNode* c_sibling
if c_node.parent is not NULL and _isElement(c_node.parent):
return
@@ -358,7 +356,6 @@
writer._exc_context._raise_if_stored()
cdef _tofilelikeC14N(f, _Element element):
- cdef python.PyThreadState* state
cdef _FilelikeWriter writer
cdef tree.xmlOutputBuffer* c_buffer
cdef char* c_filename
@@ -372,9 +369,9 @@
if _isString(f):
filename8 = _encodeFilename(f)
c_filename = _cstr(filename8)
- state = python.PyEval_SaveThread()
- bytes = c14n.xmlC14NDocSave(c_doc, NULL, 0, NULL, 1, c_filename, 0)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ bytes = c14n.xmlC14NDocSave(c_doc, NULL, 0, NULL, 1,
+ c_filename, 0)
elif hasattr(f, 'write'):
writer = _FilelikeWriter(f)
c_buffer = writer._createOutputBuffer(NULL)
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Sun Dec 2 14:45:01 2007
@@ -35,21 +35,22 @@
XML_CHAR_ENCODING_ASCII = 22 # pure ASCII
ctypedef struct xmlCharEncodingHandler
- cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name)
- cdef xmlCharEncodingHandler* xmlGetCharEncodingHandler(xmlCharEncoding enc)
- cdef int xmlCharEncCloseFunc(xmlCharEncodingHandler* handler)
- cdef xmlCharEncoding xmlDetectCharEncoding(char* text, int len)
- cdef char* xmlGetCharEncodingName(xmlCharEncoding enc)
- cdef xmlCharEncoding xmlParseCharEncoding(char* name)
+ cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name) nogil
+ cdef xmlCharEncodingHandler* xmlGetCharEncodingHandler(
+ xmlCharEncoding enc) nogil
+ cdef int xmlCharEncCloseFunc(xmlCharEncodingHandler* handler) nogil
+ cdef xmlCharEncoding xmlDetectCharEncoding(char* text, int len) nogil
+ cdef char* xmlGetCharEncodingName(xmlCharEncoding enc) nogil
+ cdef xmlCharEncoding xmlParseCharEncoding(char* name) nogil
cdef extern from "libxml/chvalid.h":
- cdef int xmlIsChar_ch(char c)
+ cdef int xmlIsChar_ch(char c) nogil
cdef extern from "libxml/hash.h":
ctypedef struct xmlHashTable
- ctypedef void xmlHashScanner(void* payload, void* data, char* name)
- void xmlHashScan(xmlHashTable* table, xmlHashScanner f, void* data)
- void* xmlHashLookup(xmlHashTable* table, char* name)
+ ctypedef void xmlHashScanner(void* payload, void* data, char* name) nogil
+ void xmlHashScan(xmlHashTable* table, xmlHashScanner f, void* data) nogil
+ void* xmlHashLookup(xmlHashTable* table, char* name) nogil
cdef extern from "libxml/tree.h":
@@ -167,106 +168,112 @@
xmlBuffer* buffer
xmlBuffer* conv
- cdef void xmlFreeDoc(xmlDoc* cur)
- cdef void xmlFreeDtd(xmlDtd* cur)
- cdef void xmlFreeNode(xmlNode* cur)
- cdef void xmlFreeNsList(xmlNs* ns)
- cdef void xmlFreeNs(xmlNs* ns)
- cdef void xmlFree(char* buf)
+ cdef void xmlFreeDoc(xmlDoc* cur) nogil
+ cdef void xmlFreeDtd(xmlDtd* cur) nogil
+ cdef void xmlFreeNode(xmlNode* cur) nogil
+ cdef void xmlFreeNsList(xmlNs* ns) nogil
+ cdef void xmlFreeNs(xmlNs* ns) nogil
+ cdef void xmlFree(char* buf) nogil
- cdef xmlNode* xmlNewNode(xmlNs* ns, char* name)
- cdef xmlNode* xmlNewDocText(xmlDoc* doc, char* content)
- cdef xmlNode* xmlNewDocComment(xmlDoc* doc, char* content)
- cdef xmlNode* xmlNewDocPI(xmlDoc* doc, char* name, char* content)
- cdef xmlNode* xmlNewReference(xmlDoc* doc, char* name)
- cdef xmlNs* xmlNewNs(xmlNode* node, char* href, char* prefix)
- cdef xmlNode* xmlAddChild(xmlNode* parent, xmlNode* cur)
- cdef xmlNode* xmlReplaceNode(xmlNode* old, xmlNode* cur)
- cdef xmlNode* xmlAddPrevSibling(xmlNode* cur, xmlNode* elem)
- cdef xmlNode* xmlAddNextSibling(xmlNode* cur, xmlNode* elem)
+ cdef xmlNode* xmlNewNode(xmlNs* ns, char* name) nogil
+ cdef xmlNode* xmlNewDocText(xmlDoc* doc, char* content) nogil
+ cdef xmlNode* xmlNewDocComment(xmlDoc* doc, char* content) nogil
+ cdef xmlNode* xmlNewDocPI(xmlDoc* doc, char* name, char* content) nogil
+ cdef xmlNode* xmlNewReference(xmlDoc* doc, char* name) nogil
+ cdef xmlNs* xmlNewNs(xmlNode* node, char* href, char* prefix) nogil
+ cdef xmlNode* xmlAddChild(xmlNode* parent, xmlNode* cur) nogil
+ cdef xmlNode* xmlReplaceNode(xmlNode* old, xmlNode* cur) nogil
+ cdef xmlNode* xmlAddPrevSibling(xmlNode* cur, xmlNode* elem) nogil
+ cdef xmlNode* xmlAddNextSibling(xmlNode* cur, xmlNode* elem) nogil
cdef xmlNode* xmlNewDocNode(xmlDoc* doc, xmlNs* ns,
- char* name, char* content)
- cdef xmlDoc* xmlNewDoc(char* version)
- cdef xmlAttr* xmlNewProp(xmlNode* node, char* name, char* value)
+ char* name, char* content) nogil
+ cdef xmlDoc* xmlNewDoc(char* version) nogil
+ cdef xmlAttr* xmlNewProp(xmlNode* node, char* name, char* value) nogil
cdef xmlAttr* xmlNewNsProp(xmlNode* node, xmlNs* ns,
- char* name, char* value)
- cdef char* xmlGetNoNsProp(xmlNode* node, char* name)
- cdef char* xmlGetNsProp(xmlNode* node, char* name, char* nameSpace)
- cdef void xmlSetNs(xmlNode* node, xmlNs* ns)
- cdef xmlAttr* xmlSetProp(xmlNode* node, char* name, char* value)
+ char* name, char* value) nogil
+ cdef char* xmlGetNoNsProp(xmlNode* node, char* name) nogil
+ cdef char* xmlGetNsProp(xmlNode* node, char* name, char* nameSpace) nogil
+ cdef void xmlSetNs(xmlNode* node, xmlNs* ns) nogil
+ cdef xmlAttr* xmlSetProp(xmlNode* node, char* name, char* value) nogil
cdef xmlAttr* xmlSetNsProp(xmlNode* node, xmlNs* ns,
- char* name, char* value)
- cdef int xmlRemoveProp(xmlAttr* cur)
- cdef char* xmlGetNodePath(xmlNode* node)
- cdef void xmlDocDumpMemory(xmlDoc* cur, char** mem, int* size)
+ char* name, char* value) nogil
+ cdef int xmlRemoveProp(xmlAttr* cur) nogil
+ cdef char* xmlGetNodePath(xmlNode* node) nogil
+ cdef void xmlDocDumpMemory(xmlDoc* cur, char** mem, int* size) nogil
cdef void xmlDocDumpMemoryEnc(xmlDoc* cur, char** mem, int* size,
- char* encoding)
- cdef int xmlSaveFileTo(xmlOutputBuffer* out, xmlDoc* cur, char* encoding)
-
- cdef void xmlUnlinkNode(xmlNode* cur)
- cdef xmlNode* xmlDocSetRootElement(xmlDoc* doc, xmlNode* root)
- cdef xmlNode* xmlDocGetRootElement(xmlDoc* doc)
- cdef void xmlSetTreeDoc(xmlNode* tree, xmlDoc* doc)
- cdef xmlAttr* xmlHasProp(xmlNode* node, char* name)
- cdef xmlAttr* xmlHasNsProp(xmlNode* node, char* name, char* nameSpace)
- cdef char* xmlNodeGetContent(xmlNode* cur)
- cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, char* prefix)
- cdef xmlNs* xmlSearchNsByHref(xmlDoc* doc, xmlNode* node, char* href)
- cdef int xmlIsBlankNode(xmlNode* node)
- cdef long xmlGetLineNo(xmlNode* node)
- cdef void xmlElemDump(FILE* f, xmlDoc* doc, xmlNode* cur)
+ char* encoding) nogil
+ cdef int xmlSaveFileTo(xmlOutputBuffer* out, xmlDoc* cur,
+ char* encoding) nogil
+
+ cdef void xmlUnlinkNode(xmlNode* cur) nogil
+ cdef xmlNode* xmlDocSetRootElement(xmlDoc* doc, xmlNode* root) nogil
+ cdef xmlNode* xmlDocGetRootElement(xmlDoc* doc) nogil
+ cdef void xmlSetTreeDoc(xmlNode* tree, xmlDoc* doc) nogil
+ cdef xmlAttr* xmlHasProp(xmlNode* node, char* name) nogil
+ cdef xmlAttr* xmlHasNsProp(xmlNode* node, char* name, char* nameSpace) nogil
+ cdef char* xmlNodeGetContent(xmlNode* cur) nogil
+ cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, char* prefix) nogil
+ cdef xmlNs* xmlSearchNsByHref(xmlDoc* doc, xmlNode* node, char* href) nogil
+ cdef int xmlIsBlankNode(xmlNode* node) nogil
+ cdef long xmlGetLineNo(xmlNode* node) nogil
+ cdef void xmlElemDump(FILE* f, xmlDoc* doc, xmlNode* cur) nogil
cdef void xmlNodeDumpOutput(xmlOutputBuffer* buf,
xmlDoc* doc, xmlNode* cur, int level,
- int format, char* encoding)
- cdef void xmlNodeSetName(xmlNode* cur, char* name)
- cdef void xmlNodeSetContent(xmlNode* cur, char* content)
- cdef xmlDtd* xmlCopyDtd(xmlDtd* dtd)
- cdef xmlDoc* xmlCopyDoc(xmlDoc* doc, int recursive)
- cdef xmlNode* xmlCopyNode(xmlNode* node, int extended)
- cdef xmlNode* xmlDocCopyNode(xmlNode* node, xmlDoc* doc, int extended)
- cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree)
- cdef xmlNs* xmlNewReconciliedNs(xmlDoc* doc, xmlNode* tree, xmlNs* ns)
- cdef xmlBuffer* xmlBufferCreate()
- cdef void xmlBufferFree(xmlBuffer* buf)
- cdef char* xmlBufferContent(xmlBuffer* buf)
- cdef int xmlBufferLength(xmlBuffer* buf)
- cdef int xmlKeepBlanksDefault(int val)
- cdef char* xmlNodeGetBase(xmlDoc* doc, xmlNode* node)
- cdef char* xmlBuildURI(char* href, char* base)
- cdef int xmlValidateNCName(char* value, int space)
+ int format, char* encoding) nogil
+ cdef void xmlNodeSetName(xmlNode* cur, char* name) nogil
+ cdef void xmlNodeSetContent(xmlNode* cur, char* content) nogil
+ cdef xmlDtd* xmlCopyDtd(xmlDtd* dtd) nogil
+ cdef xmlDoc* xmlCopyDoc(xmlDoc* doc, int recursive) nogil
+ cdef xmlNode* xmlCopyNode(xmlNode* node, int extended) nogil
+ cdef xmlNode* xmlDocCopyNode(xmlNode* node, xmlDoc* doc, int extended) nogil
+ cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree) nogil
+ cdef xmlNs* xmlNewReconciliedNs(xmlDoc* doc, xmlNode* tree, xmlNs* ns) nogil
+ cdef xmlBuffer* xmlBufferCreate() nogil
+ cdef void xmlBufferFree(xmlBuffer* buf) nogil
+ cdef char* xmlBufferContent(xmlBuffer* buf) nogil
+ cdef int xmlBufferLength(xmlBuffer* buf) nogil
+ cdef int xmlKeepBlanksDefault(int val) nogil
+ cdef char* xmlNodeGetBase(xmlDoc* doc, xmlNode* node) nogil
+ cdef char* xmlBuildURI(char* href, char* base) nogil
+ cdef int xmlValidateNCName(char* value, int space) nogil
cdef extern from "libxml/HTMLtree.h":
cdef void htmlNodeDumpFormatOutput(xmlOutputBuffer* buf,
xmlDoc* doc, xmlNode* cur,
- char* encoding, int format)
+ char* encoding, int format) nogil
cdef extern from "libxml/valid.h":
- cdef xmlAttr* xmlGetID(xmlDoc* doc, char* ID)
- cdef void xmlDumpNotationTable(xmlBuffer* buffer, xmlNotationTable* table)
+ cdef xmlAttr* xmlGetID(xmlDoc* doc, char* ID) nogil
+ cdef void xmlDumpNotationTable(xmlBuffer* buffer,
+ xmlNotationTable* table) nogil
cdef extern from "libxml/xmlIO.h":
- cdef void xmlBufferWriteQuotedString(xmlOutputBuffer* out, char* str)
- cdef int xmlOutputBufferWriteString(xmlOutputBuffer* out, char* str)
- cdef int xmlOutputBufferWrite(xmlOutputBuffer* out, int len, char* str)
- cdef int xmlOutputBufferFlush(xmlOutputBuffer* out)
- cdef int xmlOutputBufferClose(xmlOutputBuffer* out)
-
- ctypedef int (*xmlInputReadCallback)(void* context, char* buffer, int len)
- ctypedef int (*xmlInputCloseCallback)(void* context)
-
- ctypedef int (*xmlOutputWriteCallback)(void* context, char* buffer, int len)
- ctypedef int (*xmlOutputCloseCallback)(void* context)
+ cdef void xmlBufferWriteQuotedString(xmlOutputBuffer* out, char* str) nogil
+ cdef int xmlOutputBufferWriteString(xmlOutputBuffer* out, char* str) nogil
+ cdef int xmlOutputBufferWrite(xmlOutputBuffer* out,
+ int len, char* str) nogil
+ cdef int xmlOutputBufferFlush(xmlOutputBuffer* out) nogil
+ cdef int xmlOutputBufferClose(xmlOutputBuffer* out) nogil
+
+ ctypedef int (*xmlInputReadCallback)(void* context,
+ char* buffer, int len) nogil
+ ctypedef int (*xmlInputCloseCallback)(void* context) nogil
+
+ ctypedef int (*xmlOutputWriteCallback)(void* context,
+ char* buffer, int len) nogil
+ ctypedef int (*xmlOutputCloseCallback)(void* context) nogil
- cdef xmlOutputBuffer* xmlAllocOutputBuffer(xmlCharEncodingHandler* encoder)
+ cdef xmlOutputBuffer* xmlAllocOutputBuffer(
+ xmlCharEncodingHandler* encoder) nogil
cdef xmlOutputBuffer* xmlOutputBufferCreateIO(
xmlOutputWriteCallback iowrite,
xmlOutputCloseCallback ioclose,
void * ioctx,
- xmlCharEncodingHandler* encoder)
+ xmlCharEncodingHandler* encoder) nogil
cdef xmlOutputBuffer* xmlOutputBufferCreateFile(
- FILE* file, xmlCharEncodingHandler* encoder)
+ FILE* file, xmlCharEncodingHandler* encoder) nogil
cdef xmlOutputBuffer* xmlOutputBufferCreateFilename(
- char* URI, xmlCharEncodingHandler* encoder, int compression)
+ char* URI, xmlCharEncodingHandler* encoder, int compression) nogil
cdef extern from "libxml/xmlsave.h":
ctypedef struct xmlSaveCtxt
@@ -278,31 +285,32 @@
XML_SAVE_NO_XHTML = 8 # disable XHTML1 specific rules (2.6.22)
cdef xmlSaveCtxt* xmlSaveToFilename(char* filename, char* encoding,
- int options)
+ int options) nogil
cdef xmlSaveCtxt* xmlSaveToBuffer(xmlBuffer* buffer, char* encoding,
- int options) # libxml2 2.6.23
- cdef long xmlSaveDoc(xmlSaveCtxt* ctxt, xmlDoc* doc)
- cdef long xmlSaveTree(xmlSaveCtxt* ctxt, xmlNode* node)
- cdef int xmlSaveClose(xmlSaveCtxt* ctxt)
- cdef int xmlSaveFlush(xmlSaveCtxt* ctxt)
- cdef int xmlSaveSetAttrEscape(xmlSaveCtxt* ctxt, void* escape_func)
- cdef int xmlSaveSetEscape(xmlSaveCtxt* ctxt, void* escape_func)
+ int options) nogil # libxml2 2.6.23
+ cdef long xmlSaveDoc(xmlSaveCtxt* ctxt, xmlDoc* doc) nogil
+ cdef long xmlSaveTree(xmlSaveCtxt* ctxt, xmlNode* node) nogil
+ cdef int xmlSaveClose(xmlSaveCtxt* ctxt) nogil
+ cdef int xmlSaveFlush(xmlSaveCtxt* ctxt) nogil
+ cdef int xmlSaveSetAttrEscape(xmlSaveCtxt* ctxt, void* escape_func) nogil
+ cdef int xmlSaveSetEscape(xmlSaveCtxt* ctxt, void* escape_func) nogil
cdef extern from "libxml/globals.h":
- cdef int xmlThrDefKeepBlanksDefaultValue(int onoff)
- cdef int xmlThrDefLineNumbersDefaultValue(int onoff)
- cdef int xmlThrDefIndentTreeOutput(int onoff)
+ cdef int xmlThrDefKeepBlanksDefaultValue(int onoff) nogil
+ cdef int xmlThrDefLineNumbersDefaultValue(int onoff) nogil
+ cdef int xmlThrDefIndentTreeOutput(int onoff) nogil
cdef extern from "libxml/xmlstring.h":
- cdef char* xmlStrdup(char* cur)
+ cdef char* xmlStrdup(char* cur) nogil
cdef extern from "libxml/xmlmemory.h":
- cdef void* xmlMalloc(size_t size)
+ cdef void* xmlMalloc(size_t size) nogil
cdef extern from "etree_defs.h":
- cdef bint _isElement(xmlNode* node)
- cdef bint _isElementOrXInclude(xmlNode* node)
- cdef char* _getNs(xmlNode* node)
+ cdef bint _isElement(xmlNode* node) nogil
+ cdef bint _isElementOrXInclude(xmlNode* node) nogil
+ cdef char* _getNs(xmlNode* node) nogil
cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top,
- xmlNode* start_node, bint inclusive)
- cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node)
+ xmlNode* start_node,
+ bint inclusive) nogil
+ cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node) nogil
Modified: lxml/trunk/src/lxml/xinclude.pxd
==============================================================================
--- lxml/trunk/src/lxml/xinclude.pxd (original)
+++ lxml/trunk/src/lxml/xinclude.pxd Sun Dec 2 14:45:01 2007
@@ -4,14 +4,15 @@
ctypedef struct xmlXIncludeCtxt
- cdef int xmlXIncludeProcess(xmlDoc* doc)
- cdef int xmlXIncludeProcessFlags(xmlDoc* doc, int parser_opts)
- cdef int xmlXIncludeProcessTree(xmlNode* doc)
- cdef int xmlXIncludeProcessTreeFlags(xmlNode* doc, int parser_opts)
+ cdef int xmlXIncludeProcess(xmlDoc* doc) nogil
+ cdef int xmlXIncludeProcessFlags(xmlDoc* doc, int parser_opts) nogil
+ cdef int xmlXIncludeProcessTree(xmlNode* doc) nogil
+ cdef int xmlXIncludeProcessTreeFlags(xmlNode* doc, int parser_opts) nogil
- cdef xmlXIncludeCtxt* xmlXIncludeNewContext(xmlDoc* doc)
- cdef int xmlXIncludeProcessNode(xmlXIncludeCtxt* ctxt, xmlNode* node)
- cdef int xmlXIncludeSetFlags(xmlXIncludeCtxt* ctxt, int flags)
+ cdef xmlXIncludeCtxt* xmlXIncludeNewContext(xmlDoc* doc) nogil
+ cdef int xmlXIncludeProcessNode(xmlXIncludeCtxt* ctxt, xmlNode* node) nogil
+ cdef int xmlXIncludeSetFlags(xmlXIncludeCtxt* ctxt, int flags) nogil
# libxml2 >= 2.6.27
- cdef int xmlXIncludeProcessFlagsData(xmlDoc* doc, int flags, void* data)
+ cdef int xmlXIncludeProcessFlagsData(
+ xmlDoc* doc, int flags, void* data) nogil
Modified: lxml/trunk/src/lxml/xmlerror.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxd (original)
+++ lxml/trunk/src/lxml/xmlerror.pxd Sun Dec 2 14:45:01 2007
@@ -783,14 +783,17 @@
int int1
int int2
- ctypedef void (*xmlGenericErrorFunc)(void* ctxt, char* msg, ...)
- ctypedef void (*xmlStructuredErrorFunc)(void* userData, xmlError* error)
+ ctypedef void (*xmlGenericErrorFunc)(void* ctxt, char* msg, ...) nogil
+ ctypedef void (*xmlStructuredErrorFunc)(void* userData,
+ xmlError* error) nogil
- cdef void xmlSetGenericErrorFunc(void* ctxt, xmlGenericErrorFunc func)
- cdef void xmlSetStructuredErrorFunc(void* ctxt, xmlStructuredErrorFunc func)
+ cdef void xmlSetGenericErrorFunc(
+ void* ctxt, xmlGenericErrorFunc func) nogil
+ cdef void xmlSetStructuredErrorFunc(
+ void* ctxt, xmlStructuredErrorFunc func) nogil
cdef extern from "libxml/globals.h":
- cdef void xmlThrDefSetGenericErrorFunc(void* ctx,
- xmlGenericErrorFunc handler)
- cdef void xmlThrDefSetStructuredErrorFunc(void* ctx,
- xmlStructuredErrorFunc handler)
+ cdef void xmlThrDefSetGenericErrorFunc(
+ void* ctx, xmlGenericErrorFunc handler) nogil
+ cdef void xmlThrDefSetStructuredErrorFunc(
+ void* ctx, xmlStructuredErrorFunc handler) nogil
Modified: lxml/trunk/src/lxml/xmlparser.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlparser.pxd (original)
+++ lxml/trunk/src/lxml/xmlparser.pxd Sun Dec 2 14:45:01 2007
@@ -65,14 +65,14 @@
int initialized
cdef extern from "libxml/xmlIO.h":
- cdef xmlParserInputBuffer* xmlAllocParserInputBuffer(int enc)
+ cdef xmlParserInputBuffer* xmlAllocParserInputBuffer(int enc) nogil
cdef extern from "libxml/parser.h":
- cdef xmlDict* xmlDictCreate()
- cdef xmlDict* xmlDictCreateSub(xmlDict* subdict)
- cdef void xmlDictFree(xmlDict* sub)
- cdef int xmlDictReference(xmlDict* dict)
+ cdef xmlDict* xmlDictCreate() nogil
+ cdef xmlDict* xmlDictCreateSub(xmlDict* subdict) nogil
+ cdef void xmlDictFree(xmlDict* sub) nogil
+ cdef int xmlDictReference(xmlDict* dict) nogil
cdef int XML_COMPLETE_ATTRS # SAX option for adding DTD default attributes
@@ -118,31 +118,34 @@
# libxml2 2.6.21+ only:
XML_PARSE_COMPACT = 65536 # compact small text nodes
- cdef void xmlInitParser()
- cdef int xmlLineNumbersDefault(int onoff)
- cdef xmlParserCtxt* xmlNewParserCtxt()
+ cdef void xmlInitParser() nogil
+ cdef int xmlLineNumbersDefault(int onoff) nogil
+ cdef xmlParserCtxt* xmlNewParserCtxt() nogil
cdef xmlParserInput* xmlNewIOInputStream(xmlParserCtxt* ctxt,
xmlParserInputBuffer* input,
- int enc)
- cdef int xmlCtxtUseOptions(xmlParserCtxt* ctxt, int options)
- cdef void xmlFreeParserCtxt(xmlParserCtxt* ctxt)
- cdef void xmlCtxtReset(xmlParserCtxt* ctxt)
- cdef void xmlClearParserCtxt(xmlParserCtxt* ctxt)
+ int enc) nogil
+ cdef int xmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) nogil
+ cdef void xmlFreeParserCtxt(xmlParserCtxt* ctxt) nogil
+ cdef void xmlCtxtReset(xmlParserCtxt* ctxt) nogil
+ cdef void xmlClearParserCtxt(xmlParserCtxt* ctxt) nogil
cdef int xmlParseChunk(xmlParserCtxt* ctxt,
- char* chunk, int size, int terminate)
+ char* chunk, int size, int terminate) nogil
cdef xmlDoc* xmlCtxtReadDoc(xmlParserCtxt* ctxt,
char* cur, char* URL, char* encoding,
- int options)
+ int options) nogil
cdef xmlDoc* xmlCtxtReadFile(xmlParserCtxt* ctxt,
- char* filename, char* encoding, int options)
+ char* filename, char* encoding,
+ int options) nogil
cdef xmlDoc* xmlCtxtReadIO(xmlParserCtxt* ctxt,
xmlInputReadCallback ioread,
xmlInputCloseCallback ioclose,
void* ioctx,
- char* URL, char* encoding, int options)
+ char* URL, char* encoding,
+ int options) nogil
cdef xmlDoc* xmlCtxtReadMemory(xmlParserCtxt* ctxt,
char* buffer, int size,
- char* filename, char* encoding, int options)
+ char* filename, char* encoding,
+ int options) nogil
# iterparse:
@@ -150,33 +153,32 @@
void* user_data,
char* chunk,
int size,
- char* filename)
+ char* filename) nogil
cdef int xmlCtxtResetPush(xmlParserCtxt* ctxt,
char* chunk,
int size,
char* filename,
- char* encoding)
+ char* encoding) nogil
# entity loaders:
- ctypedef xmlParserInput* (*xmlExternalEntityLoader)(char * URL,
- char * ID,
- xmlParserCtxt* context)
- cdef xmlExternalEntityLoader xmlGetExternalEntityLoader()
- cdef void xmlSetExternalEntityLoader(xmlExternalEntityLoader f)
+ ctypedef xmlParserInput* (*xmlExternalEntityLoader)(
+ char * URL, char * ID, xmlParserCtxt* context) nogil
+ cdef xmlExternalEntityLoader xmlGetExternalEntityLoader() nogil
+ cdef void xmlSetExternalEntityLoader(xmlExternalEntityLoader f) nogil
# DTDs:
- cdef xmlDtd* xmlParseDTD(char* ExternalID, char* SystemID)
+ cdef xmlDtd* xmlParseDTD(char* ExternalID, char* SystemID) nogil
cdef xmlDtd* xmlIOParseDTD(xmlSAXHandler* sax,
xmlParserInputBuffer* input,
- int enc)
+ int enc) nogil
cdef extern from "libxml/parserInternals.h":
cdef xmlParserInput* xmlNewStringInputStream(xmlParserCtxt* ctxt,
- char* buffer)
+ char* buffer) nogil
cdef xmlParserInput* xmlNewInputFromFile(xmlParserCtxt* ctxt,
- char* filename)
- cdef void xmlFreeInputStream(xmlParserInput* input)
- cdef int xmlSwitchEncoding(xmlParserCtxt* ctxt, int enc)
+ char* filename) nogil
+ cdef void xmlFreeInputStream(xmlParserInput* input) nogil
+ cdef int xmlSwitchEncoding(xmlParserCtxt* ctxt, int enc) nogil
Modified: lxml/trunk/src/lxml/xmlschema.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlschema.pxd (original)
+++ lxml/trunk/src/lxml/xmlschema.pxd Sun Dec 2 14:45:01 2007
@@ -4,14 +4,14 @@
cdef extern from "libxml/xmlschemas.h":
ctypedef struct xmlSchema
ctypedef struct xmlSchemaParserCtxt
-
+
ctypedef struct xmlSchemaValidCtxt
-
- cdef xmlSchemaValidCtxt* xmlSchemaNewValidCtxt(xmlSchema* schema)
- cdef int xmlSchemaValidateDoc(xmlSchemaValidCtxt* ctxt, xmlDoc* doc)
- cdef xmlSchema* xmlSchemaParse(xmlSchemaParserCtxt* ctxt)
- cdef xmlSchemaParserCtxt* xmlSchemaNewParserCtxt(char* URL)
- cdef xmlSchemaParserCtxt* xmlSchemaNewDocParserCtxt(xmlDoc* doc)
- cdef void xmlSchemaFree(xmlSchema* schema)
- cdef void xmlSchemaFreeParserCtxt(xmlSchemaParserCtxt* ctxt)
- cdef void xmlSchemaFreeValidCtxt(xmlSchemaValidCtxt* ctxt)
+
+ cdef xmlSchemaValidCtxt* xmlSchemaNewValidCtxt(xmlSchema* schema) nogil
+ cdef int xmlSchemaValidateDoc(xmlSchemaValidCtxt* ctxt, xmlDoc* doc) nogil
+ cdef xmlSchema* xmlSchemaParse(xmlSchemaParserCtxt* ctxt) nogil
+ cdef xmlSchemaParserCtxt* xmlSchemaNewParserCtxt(char* URL) nogil
+ cdef xmlSchemaParserCtxt* xmlSchemaNewDocParserCtxt(xmlDoc* doc) nogil
+ cdef void xmlSchemaFree(xmlSchema* schema) nogil
+ cdef void xmlSchemaFreeParserCtxt(xmlSchemaParserCtxt* ctxt) nogil
+ cdef void xmlSchemaFreeValidCtxt(xmlSchemaValidCtxt* ctxt) nogil
Modified: lxml/trunk/src/lxml/xmlschema.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlschema.pxi (original)
+++ lxml/trunk/src/lxml/xmlschema.pxi Sun Dec 2 14:45:01 2007
@@ -81,7 +81,6 @@
Returns true if document is valid, false if not.
"""
- cdef python.PyThreadState* state
cdef xmlschema.xmlSchemaValidCtxt* valid_ctxt
cdef _Document doc
cdef _Element root_node
@@ -98,9 +97,8 @@
raise XMLSchemaError, "Failed to create validation context"
c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
- state = python.PyEval_SaveThread()
- ret = xmlschema.xmlSchemaValidateDoc(valid_ctxt, c_doc)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ ret = xmlschema.xmlSchemaValidateDoc(valid_ctxt, c_doc)
_destroyFakeDoc(doc._c_doc, c_doc)
xmlschema.xmlSchemaFreeValidCtxt(valid_ctxt)
Modified: lxml/trunk/src/lxml/xpath.pxd
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxd (original)
+++ lxml/trunk/src/lxml/xpath.pxd Sun Dec 2 14:45:01 2007
@@ -68,66 +68,65 @@
tree.xmlNode* ancestor
int error
- ctypedef struct xmlXPathCompExpr:
- pass
+ ctypedef struct xmlXPathCompExpr
ctypedef void (*xmlXPathFunction)(xmlXPathParserContext* ctxt, int nargs)
ctypedef xmlXPathFunction (*xmlXPathFuncLookupFunc)(void* ctxt,
char* name,
char* ns_uri)
- cdef xmlXPathContext* xmlXPathNewContext(tree.xmlDoc* doc)
+ cdef xmlXPathContext* xmlXPathNewContext(tree.xmlDoc* doc) nogil
cdef xmlXPathObject* xmlXPathEvalExpression(char* str,
- xmlXPathContext* ctxt)
+ xmlXPathContext* ctxt) nogil
cdef xmlXPathObject* xmlXPathCompiledEval(xmlXPathCompExpr* comp,
- xmlXPathContext* ctxt)
- cdef xmlXPathCompExpr* xmlXPathCompile(char* str)
+ xmlXPathContext* ctxt) nogil
+ cdef xmlXPathCompExpr* xmlXPathCompile(char* str) nogil
cdef xmlXPathCompExpr* xmlXPathCtxtCompile(xmlXPathContext* ctxt,
- char* str)
- cdef void xmlXPathFreeContext(xmlXPathContext* ctxt)
- cdef void xmlXPathFreeCompExpr(xmlXPathCompExpr* comp)
- cdef void xmlXPathFreeObject(xmlXPathObject* obj)
+ char* str) nogil
+ cdef void xmlXPathFreeContext(xmlXPathContext* ctxt) nogil
+ cdef void xmlXPathFreeCompExpr(xmlXPathCompExpr* comp) nogil
+ cdef void xmlXPathFreeObject(xmlXPathObject* obj) nogil
cdef int xmlXPathRegisterNs(xmlXPathContext* ctxt,
- char* prefix, char* ns_uri)
+ char* prefix, char* ns_uri) nogil
- cdef xmlNodeSet* xmlXPathNodeSetCreate(tree.xmlNode* val)
- cdef void xmlXPathFreeNodeSet(xmlNodeSet* val)
+ cdef xmlNodeSet* xmlXPathNodeSetCreate(tree.xmlNode* val) nogil
+ cdef void xmlXPathFreeNodeSet(xmlNodeSet* val) nogil
cdef extern from "libxml/xpathInternals.h":
cdef int xmlXPathRegisterFunc(xmlXPathContext* ctxt,
char* name,
- xmlXPathFunction f)
+ xmlXPathFunction f) nogil
cdef int xmlXPathRegisterFuncNS(xmlXPathContext* ctxt,
char* name,
char* ns_uri,
- xmlXPathFunction f)
+ xmlXPathFunction f) nogil
cdef void xmlXPathRegisterFuncLookup(xmlXPathContext *ctxt,
xmlXPathFuncLookupFunc f,
- void *funcCtxt)
+ void *funcCtxt) nogil
cdef int xmlXPathRegisterVariable(xmlXPathContext *ctxt,
char* name,
- xmlXPathObject* value)
+ xmlXPathObject* value) nogil
cdef int xmlXPathRegisterVariableNS(xmlXPathContext *ctxt,
char* name,
char* ns_uri,
- xmlXPathObject* value)
- cdef void xmlXPathRegisteredVariablesCleanup(xmlXPathContext *ctxt)
- cdef void xmlXPathRegisteredNsCleanup(xmlXPathContext *ctxt)
- cdef xmlXPathObject* valuePop (xmlXPathParserContext *ctxt)
- cdef int valuePush(xmlXPathParserContext* ctxt, xmlXPathObject *value)
+ xmlXPathObject* value) nogil
+ cdef void xmlXPathRegisteredVariablesCleanup(xmlXPathContext *ctxt) nogil
+ cdef void xmlXPathRegisteredNsCleanup(xmlXPathContext *ctxt) nogil
+ cdef xmlXPathObject* valuePop (xmlXPathParserContext *ctxt) nogil
+ cdef int valuePush(xmlXPathParserContext* ctxt, xmlXPathObject *value) nogil
- cdef xmlXPathObject* xmlXPathNewCString(char *val)
- cdef xmlXPathObject* xmlXPathWrapCString(char * val)
- cdef xmlXPathObject* xmlXPathNewString(char *val)
- cdef xmlXPathObject* xmlXPathWrapString(char * val)
- cdef xmlXPathObject* xmlXPathNewFloat(double val)
- cdef xmlXPathObject* xmlXPathNewBoolean(int val)
- cdef xmlXPathObject* xmlXPathNewNodeSet(tree.xmlNode* val)
- cdef xmlXPathObject* xmlXPathNewValueTree(tree.xmlNode* val)
+ cdef xmlXPathObject* xmlXPathNewCString(char *val) nogil
+ cdef xmlXPathObject* xmlXPathWrapCString(char * val) nogil
+ cdef xmlXPathObject* xmlXPathNewString(char *val) nogil
+ cdef xmlXPathObject* xmlXPathWrapString(char * val) nogil
+ cdef xmlXPathObject* xmlXPathNewFloat(double val) nogil
+ cdef xmlXPathObject* xmlXPathNewBoolean(int val) nogil
+ cdef xmlXPathObject* xmlXPathNewNodeSet(tree.xmlNode* val) nogil
+ cdef xmlXPathObject* xmlXPathNewValueTree(tree.xmlNode* val) nogil
cdef void xmlXPathNodeSetAdd(xmlNodeSet* cur,
- tree.xmlNode* val)
+ tree.xmlNode* val) nogil
cdef void xmlXPathNodeSetAddUnique(xmlNodeSet* cur,
- tree.xmlNode* val)
- cdef xmlXPathObject* xmlXPathWrapNodeSet(xmlNodeSet* val)
- cdef void xmlXPathErr(xmlXPathParserContext* ctxt, int error)
+ tree.xmlNode* val) nogil
+ cdef xmlXPathObject* xmlXPathWrapNodeSet(xmlNodeSet* val) nogil
+ cdef void xmlXPathErr(xmlXPathParserContext* ctxt, int error) nogil
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Sun Dec 2 14:45:01 2007
@@ -248,7 +248,6 @@
Absolute XPath expressions (starting with '/') will be evaluated
against the ElementTree as returned by getroottree().
"""
- cdef python.PyThreadState* state
cdef xpath.xmlXPathObject* xpathObj
cdef _Document doc
cdef char* c_path
@@ -261,10 +260,10 @@
try:
self._context.register_context(doc)
self._context.registerVariables(_variables)
- state = python.PyEval_SaveThread()
- xpathObj = xpath.xmlXPathEvalExpression(
- _cstr(path), self._xpathCtxt)
- python.PyEval_RestoreThread(state)
+ c_path = _cstr(path)
+ with nogil:
+ xpathObj = xpath.xmlXPathEvalExpression(
+ c_path, self._xpathCtxt)
result = self._handle_result(xpathObj, doc)
finally:
self._error_log.disconnect()
@@ -292,10 +291,10 @@
Variables may be provided as keyword arguments. Note that namespaces
are currently not supported for variables.
"""
- cdef python.PyThreadState* state
cdef xpath.xmlXPathObject* xpathObj
cdef xmlDoc* c_doc
cdef _Document doc
+ cdef char* c_path
path = _utf8(_path)
doc = self._element._doc
@@ -306,12 +305,12 @@
c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node)
try:
self._context.registerVariables(_variables)
- state = python.PyEval_SaveThread()
- self._xpathCtxt.doc = c_doc
- self._xpathCtxt.node = tree.xmlDocGetRootElement(c_doc)
- xpathObj = xpath.xmlXPathEvalExpression(
- _cstr(path), self._xpathCtxt)
- python.PyEval_RestoreThread(state)
+ c_path = _cstr(path)
+ with nogil:
+ self._xpathCtxt.doc = c_doc
+ self._xpathCtxt.node = tree.xmlDocGetRootElement(c_doc)
+ xpathObj = xpath.xmlXPathEvalExpression(
+ c_path, self._xpathCtxt)
result = self._handle_result(xpathObj, doc)
finally:
_destroyFakeDoc(doc._c_doc, c_doc)
@@ -370,7 +369,6 @@
self._raise_parse_error()
def __call__(self, _etree_or_element, **_variables):
- cdef python.PyThreadState* state
cdef xpath.xmlXPathObject* xpathObj
cdef _Document document
cdef _Element element
@@ -387,10 +385,9 @@
try:
self._context.register_context(document)
self._context.registerVariables(_variables)
- state = python.PyEval_SaveThread()
- xpathObj = xpath.xmlXPathCompiledEval(
- self._xpath, self._xpathCtxt)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ xpathObj = xpath.xmlXPathCompiledEval(
+ self._xpath, self._xpathCtxt)
result = self._handle_result(xpathObj, document)
finally:
self._error_log.disconnect()
Modified: lxml/trunk/src/lxml/xslt.pxd
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxd (original)
+++ lxml/trunk/src/lxml/xslt.pxd Sun Dec 2 14:45:01 2007
@@ -23,20 +23,21 @@
xmlDict* dict
int profile
- cdef xsltStylesheet* xsltParseStylesheetDoc(xmlDoc* doc)
- cdef void xsltFreeStylesheet(xsltStylesheet* sheet)
+ cdef xsltStylesheet* xsltParseStylesheetDoc(xmlDoc* doc) nogil
+ cdef void xsltFreeStylesheet(xsltStylesheet* sheet) nogil
cdef extern from "libxslt/extensions.h":
cdef int xsltRegisterExtFunction(xsltTransformContext* ctxt,
char* name,
char* URI,
- xmlXPathFunction function)
+ xmlXPathFunction function) nogil
cdef int xsltRegisterExtModuleFunction(char* name, char* URI,
- xmlXPathFunction function)
+ xmlXPathFunction function) nogil
cdef int xsltUnregisterExtModuleFunction(char* name, char* URI)
- cdef xmlXPathFunction xsltExtModuleFunctionLookup(char* name, char* URI)
+ cdef xmlXPathFunction xsltExtModuleFunctionLookup(
+ char* name, char* URI) nogil
cdef int xsltRegisterExtPrefix(xsltStylesheet* style,
- char* prefix, char* URI)
+ char* prefix, char* URI) nogil
cdef extern from "libxslt/documents.h":
ctypedef enum xsltLoadType:
@@ -49,30 +50,30 @@
void* ctxt,
xsltLoadType type)
cdef xsltDocLoaderFunc xsltDocDefaultLoader
- cdef void xsltSetLoaderFunc(xsltDocLoaderFunc f)
+ cdef void xsltSetLoaderFunc(xsltDocLoaderFunc f) nogil
cdef extern from "libxslt/transform.h":
cdef xmlDoc* xsltApplyStylesheet(xsltStylesheet* style, xmlDoc* doc,
- char** params)
+ char** params) nogil
cdef xmlDoc* xsltApplyStylesheetUser(xsltStylesheet* style, xmlDoc* doc,
char** params, char* output,
void* profile,
- xsltTransformContext* context)
+ xsltTransformContext* context) nogil
cdef xsltTransformContext* xsltNewTransformContext(xsltStylesheet* style,
- xmlDoc* doc)
- cdef void xsltFreeTransformContext(xsltTransformContext* context)
+ xmlDoc* doc) nogil
+ cdef void xsltFreeTransformContext(xsltTransformContext* context) nogil
cdef extern from "libxslt/xsltutils.h":
cdef int xsltSaveResultToString(char** doc_txt_ptr,
int* doc_txt_len,
xmlDoc* result,
- xsltStylesheet* style)
+ xsltStylesheet* style) nogil
- cdef void xsltSetGenericErrorFunc(void* ctxt,
- void (*handler)(void* ctxt, char* msg, ...))
- cdef void xsltSetTransformErrorFunc(xsltTransformContext*,
- void* ctxt,
- void (*handler)(void* ctxt, char* msg, ...))
+ cdef void xsltSetGenericErrorFunc(
+ void* ctxt, void (*handler)(void* ctxt, char* msg, ...)) nogil
+ cdef void xsltSetTransformErrorFunc(
+ xsltTransformContext*, void* ctxt,
+ void (*handler)(void* ctxt, char* msg, ...)) nogil
cdef extern from "libxslt/security.h":
ctypedef struct xsltSecurityPrefs
@@ -87,20 +88,20 @@
xsltTransformContext* ctxt,
char* value)
- cdef xsltSecurityPrefs* xsltNewSecurityPrefs()
- cdef void xsltFreeSecurityPrefs(xsltSecurityPrefs* sec)
+ cdef xsltSecurityPrefs* xsltNewSecurityPrefs() nogil
+ cdef void xsltFreeSecurityPrefs(xsltSecurityPrefs* sec) nogil
cdef int xsltSecurityForbid(xsltSecurityPrefs* sec,
xsltTransformContext* ctxt,
- char* value)
+ char* value) nogil
cdef int xsltSecurityAllow(xsltSecurityPrefs* sec,
xsltTransformContext* ctxt,
- char* value)
+ char* value) nogil
cdef int xsltSetSecurityPrefs(xsltSecurityPrefs* sec,
xsltSecurityOption option,
- xsltSecurityCheck func)
+ xsltSecurityCheck func) nogil
cdef int xsltSetCtxtSecurityPrefs(xsltSecurityPrefs* sec,
- xsltTransformContext* ctxt)
- cdef xmlDoc* xsltGetProfileInformation(xsltTransformContext* ctxt)
+ xsltTransformContext* ctxt) nogil
+ cdef xmlDoc* xsltGetProfileInformation(xsltTransformContext* ctxt) nogil
cdef extern from "libxslt/extra.h":
cdef char* XSLT_LIBXSLT_NAMESPACE
@@ -109,7 +110,7 @@
cdef char* XSLT_XT_NAMESPACE
cdef xmlXPathFunction xsltFunctionNodeSet
- cdef void xsltRegisterAllExtras()
+ cdef void xsltRegisterAllExtras() nogil
cdef extern from "libexslt/exslt.h":
- cdef void exsltRegisterAll()
+ cdef void exsltRegisterAll() nogil
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Sun Dec 2 14:45:01 2007
@@ -271,7 +271,6 @@
def __init__(self, xslt_input, extensions=None, regexp=True,
access_control=None):
- cdef python.PyThreadState* state
cdef xslt.xsltStylesheet* c_style
cdef xmlDoc* c_doc
cdef xmlDoc* fake_c_doc
@@ -301,9 +300,8 @@
c_doc._private = self._xslt_resolver_context
self._error_log.connect()
- state = python.PyEval_SaveThread()
- c_style = xslt.xsltParseStylesheetDoc(c_doc)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ c_style = xslt.xsltParseStylesheetDoc(c_doc)
self._error_log.disconnect()
if c_style is NULL:
@@ -443,7 +441,6 @@
cdef xmlDoc* _run_transform(self, _Document input_doc, xmlDoc* c_input_doc,
parameters, _XSLTContext context,
xslt.xsltTransformContext* transform_ctxt):
- cdef python.PyThreadState* state
cdef xmlDoc* c_result
cdef char** params
cdef Py_ssize_t i, parameter_count
@@ -480,10 +477,9 @@
else:
params = NULL
- state = python.PyEval_SaveThread()
- c_result = xslt.xsltApplyStylesheetUser(
- self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ c_result = xslt.xsltApplyStylesheetUser(
+ self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt)
if params is not NULL:
# deallocate space for parameters
@@ -499,7 +495,6 @@
cdef XSLT _xslt
cdef _Document _profile
cdef _saveToStringAndSize(self, char** s, int* l):
- cdef python.PyThreadState* state
cdef _Document doc
cdef int r
if self._context_node is not None:
@@ -509,9 +504,9 @@
if doc is None:
s[0] = NULL
return
- state = python.PyEval_SaveThread()
- r = xslt.xsltSaveResultToString(s, l, doc._c_doc, self._xslt._c_style)
- python.PyEval_RestoreThread(state)
+ with nogil:
+ r = xslt.xsltSaveResultToString(s, l, doc._c_doc,
+ self._xslt._c_style)
if r == -1:
raise XSLTSaveError, "Error saving XSLT result to string"
From scoder at codespeak.net Wed Dec 5 19:30:47 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 5 Dec 2007 19:30:47 +0100 (CET)
Subject: [Lxml-checkins] r49400 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20071205183047.C59DA1684CC@codespeak.net>
Author: scoder
Date: Wed Dec 5 19:30:47 2007
New Revision: 49400
Modified:
lxml/trunk/TODO.txt
lxml/trunk/selftest.py
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/tests/test_elementtree.py
lxml/trunk/src/lxml/xmlerror.pxi
lxml/trunk/src/lxml/xslt.pxi
Log:
properties 'position' and 'code' on ParseError exceptions, small fixes to XSLT error reporting
Modified: lxml/trunk/TODO.txt
==============================================================================
--- lxml/trunk/TODO.txt (original)
+++ lxml/trunk/TODO.txt Wed Dec 5 19:30:47 2007
@@ -54,6 +54,6 @@
* clean support for entities (is the Entity element class enough?)
-* implement 'position' property on ParseError exception
-
-* rewrite iterparse() to accept a parser as argument instead of being one
+* rewrite iterparse() to accept a parser as argument instead of being
+ one (or maybe not: iterparse() can't deal with all parser options
+ anyway).
Modified: lxml/trunk/selftest.py
==============================================================================
--- lxml/trunk/selftest.py (original)
+++ lxml/trunk/selftest.py Wed Dec 5 19:30:47 2007
@@ -653,7 +653,7 @@
except ET.ParseError:
return sys.exc_value
-# doesn't work with lxml.etree
+# doesn't work with lxml.etree -> different positions
del error
def namespace():
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Wed Dec 5 19:30:47 2007
@@ -13,7 +13,10 @@
class XMLSyntaxError(ParseError):
"""Syntax error while parsing an XML document.
"""
- pass
+ def __init__(self, message, code, line, column):
+ ParseError.__init__(self, message)
+ self.position = (line, column)
+ self.code = code
class ParserError(LxmlError):
"""Internal lxml parser error.
@@ -449,7 +452,6 @@
context._initParserContext(c_ctxt)
context._error_log = _ErrorLog()
-
cdef int _raiseParseError(xmlparser.xmlParserCtxt* ctxt, filename,
_ErrorLog error_log) except 0:
if filename is not None and \
@@ -458,18 +460,21 @@
message = "Error reading file '%s': %s" % (
filename, (ctxt.lastError.message).strip())
else:
- message = "Error reading file '%s'" % filename
- raise IOError, message
+ message = "Error reading '%s'" % filename
+ raise IOError(message)
elif error_log:
- raise XMLSyntaxError, error_log._buildExceptionMessage(
- "Document is not well formed")
+ raise error_log._buildParseException(
+ XMLSyntaxError, "Document is not well formed")
elif ctxt.lastError.message is not NULL:
message = (ctxt.lastError.message).strip()
+ code = ctxt.lastError.code
+ line = ctxt.lastError.line
+ column = ctxt.lastError.int2
if ctxt.lastError.line > 0:
- message = "line %d: %s" % (ctxt.lastError.line, message)
- raise XMLSyntaxError, message
+ message = "line %d: %s" % (line, message)
+ raise XMLSyntaxError(message, code, line, column)
else:
- raise XMLSyntaxError
+ raise XMLSyntaxError(None, xmlerror.XML_ERR_INTERNAL_ERROR, 0, 0)
cdef xmlDoc* _handleParseResult(_ParserContext context,
xmlparser.xmlParserCtxt* c_ctxt,
@@ -931,7 +936,8 @@
cdef xmlDoc* c_doc
cdef _Document doc
if not self._feed_parser_running:
- raise XMLSyntaxError, "no element found"
+ raise XMLSyntaxError("no element found",
+ xmlerror.XML_ERR_INTERNAL_ERROR, 0, 0)
context = self._getPushParserContext()
pctxt = context._c_ctxt
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Wed Dec 5 19:30:47 2007
@@ -3008,6 +3008,17 @@
self.assertRaises(ParseError, parser.close)
+ def test_feed_parser_error_position(self):
+ ParseError = self.etree.ParseError
+ parser = self.etree.XMLParser()
+ try:
+ parser.close()
+ except ParseError, e:
+ self.assertNotEquals(None, e.code)
+ self.assertNotEquals(0, e.code)
+ self.assert_(isinstance(e.position, tuple))
+ self.assert_(e.position >= (0, 0))
+
# parser target interface
def test_parser_target_tag(self):
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Wed Dec 5 19:30:47 2007
@@ -128,6 +128,28 @@
if is_error:
self.last_error = entry
+ cdef _buildParseException(self, exctype, default_message):
+ code = xmlerror.XML_ERR_INTERNAL_ERROR
+ if self._first_error is None:
+ return exctype(default_message, code, 0, 0)
+ if self._first_error is None or \
+ self._first_error.message is None or \
+ not self._first_error.message:
+ message = default_message
+ line = 0
+ column = 0
+ else:
+ message = self._first_error.message
+ code = self._first_error.type
+ line = self._first_error.line
+ column = self._first_error.column
+ if line > 0:
+ if column > 0:
+ message = "%s, line %d, column %d" % (message, line, column)
+ else:
+ message = "%s, line %d" % (message, line)
+ return exctype(message, code, line, column)
+
cdef _buildExceptionMessage(self, default_message):
if self._first_error is None:
return default_message
@@ -393,7 +415,9 @@
cdef char* c_message
cdef char* c_element
cdef int i, text_size, element_size
- if __DEBUG == 0 or msg is NULL or msg[0] == c'\n':
+ if __DEBUG == 0 or msg is NULL:
+ return
+ if msg[0] == c'\n' or msg[0] == c'\0':
return
cstd.va_start(args, msg)
@@ -401,18 +425,19 @@
c_text = cstd.va_charptr(args)
else:
c_text = NULL
- if cstd.strstr(msg, 'file %s') is not NULL:
+ if cstd.strstr(msg, 'file %s'):
c_error.file = cstd.va_charptr(args)
- if c_error.file is not NULL and cstd.strlen(c_error.file) > 0:
- if cstd.strncmp(c_error.file, 'XSLT:', 5) == 0:
- c_error.file = ''
+ if c_error.file and \
+ cstd.strncmp(c_error.file,
+ 'string://__STRING__XSLT', 23) == 0:
+ c_error.file = ''
else:
c_error.file = NULL
- if cstd.strstr(msg, 'line %d') is not NULL:
+ if cstd.strstr(msg, 'line %d'):
c_error.line = cstd.va_int(args)
else:
c_error.line = -1
- if cstd.strstr(msg, 'element %s') is not NULL:
+ if cstd.strstr(msg, 'element %s'):
c_element = cstd.va_charptr(args)
else:
c_element = NULL
@@ -420,7 +445,17 @@
c_message = NULL
if c_text is NULL:
- c_error.message = ''
+ if c_element is not NULL and \
+ cstd.strchr(msg, c'%') == cstd.strrchr(msg, c'%'):
+ # special case: a single occurrence of 'element %s'
+ text_size = cstd.strlen(msg)
+ element_size = cstd.strlen(c_element)
+ c_message = cstd.malloc(
+ (text_size + element_size + 1) * sizeof(char))
+ cstd.sprintf(c_message, msg, c_element)
+ c_error.message = c_message
+ else:
+ c_error.message = ''
elif c_element is NULL:
c_error.message = c_text
else:
@@ -439,8 +474,7 @@
_forwardError(c_log_handler, &c_error)
if c_message is not NULL:
- cstd.free(c_error.message)
-
+ cstd.free(c_message)
################################################################################
## CONSTANTS FROM "xmlerror.h" (or rather libxml-xmlerror.html)
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Wed Dec 5 19:30:47 2007
@@ -273,10 +273,8 @@
access_control=None):
cdef xslt.xsltStylesheet* c_style
cdef xmlDoc* c_doc
- cdef xmlDoc* fake_c_doc
cdef _Document doc
cdef _Element root_node
- cdef _ExsltRegExp _regexp
doc = _documentOrRaise(xslt_input)
root_node = _rootNodeOrRaise(xslt_input)
@@ -308,10 +306,13 @@
tree.xmlFreeDoc(c_doc)
self._xslt_resolver_context._raise_if_stored()
# last error seems to be the most accurate here
- if self._error_log.last_error is not None:
- raise XSLTParseError, self._error_log.last_error.message
+ if self._error_log.last_error is not None and \
+ self._error_log.last_error.message:
+ raise XSLTParseError(self._error_log.last_error.message)
else:
- raise XSLTParseError, "Cannot parse stylesheet"
+ raise XSLTParseError(
+ self._error_log._buildExceptionMessage(
+ "Cannot parse stylesheet"))
c_doc._private = NULL # no longer used!
self._c_style = c_style
From scoder at codespeak.net Wed Dec 5 19:31:25 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 5 Dec 2007 19:31:25 +0100 (CET)
Subject: [Lxml-checkins] r49401 - lxml/trunk/src/lxml/tests
Message-ID: <20071205183125.8DD901684CC@codespeak.net>
Author: scoder
Date: Wed Dec 5 19:31:25 2007
New Revision: 49401
Modified:
lxml/trunk/src/lxml/tests/test_xslt.py
Log:
additional XSLT doc resolver test case
Modified: lxml/trunk/src/lxml/tests/test_xslt.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xslt.py (original)
+++ lxml/trunk/src/lxml/tests/test_xslt.py Wed Dec 5 19:31:25 2007
@@ -622,7 +622,6 @@
'{http://www.w3.org/1999/XSL/Transform}stylesheet')
def test_xslt_document_error(self):
- # make sure document('') works from parsed strings
xslt = etree.XSLT(etree.XML("""\
@@ -633,6 +632,68 @@
"""))
self.assertRaises(etree.XSLTApplyError, xslt, etree.XML(''))
+ def test_xslt_document_XML_resolver(self):
+ # make sure document('') works when custom resolvers are in use
+ assertEquals = self.assertEquals
+ called = {'count' : 0}
+ class TestResolver(etree.Resolver):
+ def resolve(self, url, id, context):
+ assertEquals(url, 'file://ANYTHING')
+ called['count'] += 1
+ return self.resolve_string('', context)
+
+ parser = etree.XMLParser()
+ parser.resolvers.add(TestResolver())
+
+ xslt = etree.XSLT(etree.XML("""\
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A
+ B
+
+
+""", parser))
+
+ self.assertEquals(called['count'], 0)
+ result = xslt(etree.XML(''))
+ self.assertEquals(called['count'], 1)
+
+ root = result.getroot()
+ self.assertEquals(root.tag,
+ 'test')
+ self.assertEquals(len(root), 4)
+
+ self.assertEquals(root[0].tag,
+ 'CALLED')
+ self.assertEquals(root[1].tag,
+ '{local}entry')
+ self.assertEquals(root[1].text,
+ None)
+ self.assertEquals(root[1].get("value"),
+ 'A')
+ self.assertEquals(root[2].tag,
+ 'CALLED')
+ self.assertEquals(root[3].tag,
+ '{local}entry')
+ self.assertEquals(root[3].text,
+ None)
+ self.assertEquals(root[3].get("value"),
+ 'B')
+
def test_xslt_move_result(self):
root = etree.XML('''\
From scoder at codespeak.net Wed Dec 5 19:31:51 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 5 Dec 2007 19:31:51 +0100 (CET)
Subject: [Lxml-checkins] r49402 - lxml/trunk/src/lxml
Message-ID: <20071205183151.8060D1684CD@codespeak.net>
Author: scoder
Date: Wed Dec 5 19:31:51 2007
New Revision: 49402
Modified:
lxml/trunk/src/lxml/parsertarget.pxi
Log:
fix: memory leak in target parser
Modified: lxml/trunk/src/lxml/parsertarget.pxi
==============================================================================
--- lxml/trunk/src/lxml/parsertarget.pxi (original)
+++ lxml/trunk/src/lxml/parsertarget.pxi Wed Dec 5 19:31:51 2007
@@ -115,5 +115,14 @@
cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser,
xmlDoc* result, filename) except NULL:
+ if result is not NULL and result._private is NULL:
+ # no _Document proxy => orphen
+ tree.xmlFreeDoc(result)
+ if self._c_ctxt.myDoc is not NULL and \
+ self._c_ctxt.myDoc is not result and \
+ self._c_ctxt.myDoc._private is NULL:
+ # no _Document proxy => orphen
+ tree.xmlFreeDoc(self._c_ctxt.myDoc)
+ self._c_ctxt.myDoc = NULL
self._raise_if_stored()
raise _TargetParserResult(self._python_target.close())
From scoder at codespeak.net Wed Dec 5 19:32:16 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 5 Dec 2007 19:32:16 +0100 (CET)
Subject: [Lxml-checkins] r49403 - lxml/trunk
Message-ID: <20071205183216.9A91D1684CE@codespeak.net>
Author: scoder
Date: Wed Dec 5 19:32:16 2007
New Revision: 49403
Modified:
lxml/trunk/CHANGES.txt
Log:
changelog
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed Dec 5 19:32:16 2007
@@ -2,6 +2,23 @@
lxml changelog
==============
+Under development
+=================
+
+Features added
+--------------
+
+* New properties ``position`` and ``code`` on ParseError exception (as
+ in ET 1.3)
+
+Bugs fixed
+----------
+
+* Minor bugs in XSLT error message formatting.
+
+* Result document memory leak in target parser.
+
+
2.0alpha5 (2007-11-24)
======================
From scoder at codespeak.net Wed Dec 5 19:33:05 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 5 Dec 2007 19:33:05 +0100 (CET)
Subject: [Lxml-checkins] r49404 - lxml/trunk/src/lxml
Message-ID: <20071205183305.B15B71684CE@codespeak.net>
Author: scoder
Date: Wed Dec 5 19:33:05 2007
New Revision: 49404
Modified:
lxml/trunk/src/lxml/schematron.pxi
Log:
cleanup in schematron code
Modified: lxml/trunk/src/lxml/schematron.pxi
==============================================================================
--- lxml/trunk/src/lxml/schematron.pxi (original)
+++ lxml/trunk/src/lxml/schematron.pxi Wed Dec 5 19:33:05 2007
@@ -81,8 +81,8 @@
cdef char* c_href
cdef schematron.xmlSchematronParserCtxt* parser_ctxt
if not config.ENABLE_SCHEMATRON:
- raise SchematronError, \
- "lxml.etree was compiled without Schematron support."
+ raise SchematronError(
+ "lxml.etree was compiled without Schematron support.")
self._c_schema = NULL
if etree is not None:
doc = _documentOrRaise(etree)
@@ -98,7 +98,7 @@
parser_ctxt = schematron.xmlSchematronNewParserCtxt(_cstr(filename))
c_doc = NULL
else:
- raise SchematronParseError, "No tree or file given"
+ raise SchematronParseError("No tree or file given")
if parser_ctxt is NULL:
python.PyErr_NoMemory()
@@ -107,7 +107,8 @@
schematron.xmlSchematronFreeParserCtxt(parser_ctxt)
if self._c_schema is NULL:
- raise SchematronParseError, "Document is not a valid Schematron schema"
+ raise SchematronParseError(
+ "Document is not a valid Schematron schema")
_Validator.__init__(self)
def __dealloc__(self):
@@ -127,27 +128,28 @@
doc = _documentOrRaise(etree)
root_node = _rootNodeOrRaise(etree)
- self._error_log.connect()
options = schematron.XML_SCHEMATRON_OUT_QUIET
- if tree.LIBXML_VERSION <= 20629:
- # hack to switch off stderr output
- options = options | schematron.XML_SCHEMATRON_OUT_XML
+ #if tree.LIBXML_VERSION <= 20630: # ... and later?
+ # hack to switch off stderr output
+ options = options | schematron.XML_SCHEMATRON_OUT_XML
+
valid_ctxt = schematron.xmlSchematronNewValidCtxt(
self._c_schema, options)
if valid_ctxt is NULL:
- self._error_log.disconnect()
- raise SchematronError, "Failed to create validation context"
+ raise SchematronError("Failed to create validation context")
+ self._error_log.connect()
c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
with nogil:
ret = schematron.xmlSchematronValidateDoc(valid_ctxt, c_doc)
_destroyFakeDoc(doc._c_doc, c_doc)
+ self._error_log.disconnect()
schematron.xmlSchematronFreeValidCtxt(valid_ctxt)
- self._error_log.disconnect()
if ret == -1:
- raise SchematronValidateError, "Internal error in Schematron validation"
+ raise SchematronValidateError(
+ "Internal error in Schematron validation")
if ret == 0:
return True
else:
From scoder at codespeak.net Sat Dec 8 16:06:43 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 8 Dec 2007 16:06:43 +0100 (CET)
Subject: [Lxml-checkins] r49552 - lxml/trunk/src/lxml
Message-ID: <20071208150643.8BB311684C2@codespeak.net>
Author: scoder
Date: Sat Dec 8 16:06:42 2007
New Revision: 49552
Modified:
lxml/trunk/src/lxml/xslt.pxi
Log:
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Sat Dec 8 16:06:42 2007
@@ -316,7 +316,6 @@
c_doc._private = NULL # no longer used!
self._c_style = c_style
-
self._context = _XSLTContext(None, extensions, regexp)
def __dealloc__(self):
From scoder at codespeak.net Sat Dec 8 16:06:46 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 8 Dec 2007 16:06:46 +0100 (CET)
Subject: [Lxml-checkins] r49553 - in lxml/trunk: . doc src/lxml
Message-ID: <20071208150646.1E6831684C1@codespeak.net>
Author: scoder
Date: Sat Dec 8 16:06:45 2007
New Revision: 49553
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/extensions.txt
lxml/trunk/src/lxml/extensions.pxi
Log:
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sat Dec 8 16:06:45 2007
@@ -18,6 +18,12 @@
* Result document memory leak in target parser.
+Other changes
+-------------
+
+* Second argument to ``lxml.etree.Extension()`` helper is no longer
+ required, third argument is now a keyword-only argument ``ns``.
+
2.0alpha5 (2007-11-24)
======================
Modified: lxml/trunk/doc/extensions.txt
==============================================================================
--- lxml/trunk/doc/extensions.txt (original)
+++ lxml/trunk/doc/extensions.txt Sat Dec 8 16:06:45 2007
@@ -217,20 +217,21 @@
>>> ext_module = MyExt()
>>> functions = ('function1', 'function2')
- >>> extensions = etree.Extension( ext_module, functions, 'local-ns' )
+ >>> extensions = etree.Extension( ext_module, functions, ns='local-ns' )
>>> e = etree.XPathEvaluator(doc, namespaces=namespaces, extensions=extensions)
>>> print e.evaluate('l:function1(string(b))')
1Haegar
-The second argument to ``Extension`` can either be be a sequence of names to
-select from the module, a dictionary that explicitly maps function names to
-their XPath alter-ego or ``None`` (explicitly passed) to take all available
-functions under their original name (if their name does not start with '_').
-
-The third argument takes a namespace URI or ``None`` (also if left out) for
-the default namespace. The following examples will therefore all do the same
-thing::
+The optional second argument to ``Extension`` can either be be a
+sequence of names to select from the module, a dictionary that
+explicitly maps function names to their XPath alter-ego or ``None``
+(explicitly passed) to take all available functions under their
+original name (if their name does not start with '_').
+
+The additional ``ns`` keyword argument takes a namespace URI or
+``None`` (also if left out) for the default namespace. The following
+examples will therefore all do the same thing::
>>> functions = ('function1', 'function2', 'function3')
>>> extensions = etree.Extension( ext_module, functions )
@@ -238,12 +239,12 @@
>>> print e.evaluate('function1(function2(function3(string(b))))')
123Haegar
- >>> extensions = etree.Extension( ext_module, functions, None )
+ >>> extensions = etree.Extension( ext_module, functions, ns=None )
>>> e = etree.XPathEvaluator(doc, extensions=extensions)
>>> print e.evaluate('function1(function2(function3(string(b))))')
123Haegar
- >>> extensions = etree.Extension( ext_module, None )
+ >>> extensions = etree.Extension(ext_module)
>>> e = etree.XPathEvaluator(doc, extensions=extensions)
>>> print e.evaluate('function1(function2(function3(string(b))))')
123Haegar
@@ -253,15 +254,15 @@
... 'function2' : 'function2',
... 'function3' : 'function3'
... }
- >>> extensions = etree.Extension( ext_module, functions )
+ >>> extensions = etree.Extension(ext_module, functions)
>>> e = etree.XPathEvaluator(doc, extensions=extensions)
>>> print e.evaluate('function1(function2(function3(string(b))))')
123Haegar
For convenience, you can also pass a sequence of extensions::
- >>> extensions1 = etree.Extension( ext_module, None )
- >>> extensions2 = etree.Extension( ext_module, None, 'local-ns' )
+ >>> extensions1 = etree.Extension(ext_module)
+ >>> extensions2 = etree.Extension(ext_module, ns='local-ns')
>>> e = etree.XPathEvaluator(doc, extensions=[extensions1, extensions2],
... namespaces=namespaces)
>>> print e.evaluate('function1(l:function2(function3(string(b))))')
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Sat Dec 8 16:06:45 2007
@@ -323,8 +323,17 @@
#print "Holding document:", element._doc._c_doc
self._temp_refs.add((<_Element>o)._doc)
+def Extension(module, function_mapping=None, *, ns=None):
+ """Build a dictionary of extension functions from the functions
+ defined in a module or the methods of an object.
+
+ As second argument, you can pass an additional mapping of
+ attribute names to XPath function names, or a list of function
+ names that should be taken.
-def Extension(module, function_mapping, ns=None):
+ The ``ns`` keyword argument accepts a namespace URI for the XPath
+ functions.
+ """
functions = {}
if python.PyDict_Check(function_mapping):
for function_name, xpath_name in function_mapping.items():
@@ -332,16 +341,13 @@
getattr(module, function_name))
else:
if function_mapping is None:
- function_mapping = []
- for name in dir(module):
- if not name.startswith('_'):
- python.PyList_Append(function_mapping, name)
+ function_mapping = [ name for name in dir(module)
+ if not name.startswith('_') ]
for function_name in function_mapping:
python.PyDict_SetItem(functions, (ns, function_name),
getattr(module, function_name))
return functions
-
################################################################################
# EXSLT regexp implementation
From scoder at codespeak.net Sat Dec 8 16:06:50 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 8 Dec 2007 16:06:50 +0100 (CET)
Subject: [Lxml-checkins] r49554 - lxml/trunk/src/lxml
Message-ID: <20071208150650.4970C16851F@codespeak.net>
Author: scoder
Date: Sat Dec 8 16:06:49 2007
New Revision: 49554
Modified:
lxml/trunk/src/lxml/extensions.pxi
Log:
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Sat Dec 8 16:06:49 2007
@@ -81,12 +81,14 @@
ns_uri_utf = self._to_utf(ns_uri)
python.PyList_Append(ns, (prefix_utf, ns_uri_utf))
namespaces = ns
+ else:
+ namespaces = None
self._doc = None
self._exc = _ExceptionContext()
self._extensions = extensions
self._namespaces = namespaces
- self._temp_refs = _TempStore()
+ self._temp_refs = _TempStore()
if enable_regexp:
_regexp = _ExsltRegExp()
From scoder at codespeak.net Sat Dec 8 16:06:53 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 8 Dec 2007 16:06:53 +0100 (CET)
Subject: [Lxml-checkins] r49555 - lxml/trunk/src/lxml
Message-ID: <20071208150653.D3D1C168523@codespeak.net>
Author: scoder
Date: Sat Dec 8 16:06:53 2007
New Revision: 49555
Modified:
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Sat Dec 8 16:06:53 2007
@@ -1746,8 +1746,12 @@
tree.xmlFree(c_result)
return 1
- def __richcmp__(self, other, int op):
- return python.PyObject_RichCompare(dict(self), other, op)
+ def __richcmp__(one, other, int op):
+ if not python.PyDict_Check(one):
+ one = dict(one)
+ if not python.PyDict_Check(other):
+ other = dict(other)
+ return python.PyObject_RichCompare(one, other, op)
cdef class _AttribIterator:
"""Attribute iterator - for internal use only!
From scoder at codespeak.net Sat Dec 8 16:06:55 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 8 Dec 2007 16:06:55 +0100 (CET)
Subject: [Lxml-checkins] r49556 - lxml/trunk/src/lxml
Message-ID: <20071208150655.06DD3168541@codespeak.net>
Author: scoder
Date: Sat Dec 8 16:06:55 2007
New Revision: 49556
Modified:
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Sat Dec 8 16:06:55 2007
@@ -215,6 +215,14 @@
cdef class QName:
"""QName wrapper.
+
+ Pass a tag name by itself or a namespace URI and a tag name to
+ create a qualified name. The ``text`` property holds the
+ qualified name in ``{namespace}tagname`` notation.
+
+ You can pass QName objects wherever a tag name is expected. Also,
+ setting Element text from a QName will resolve the namespace
+ prefix and set a qualified text value.
"""
cdef readonly object text
def __init__(self, text_or_uri, tag=None):
From scoder at codespeak.net Sat Dec 8 16:06:59 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 8 Dec 2007 16:06:59 +0100 (CET)
Subject: [Lxml-checkins] r49557 - lxml/trunk/src/lxml
Message-ID: <20071208150659.57AEA1684C2@codespeak.net>
Author: scoder
Date: Sat Dec 8 16:06:58 2007
New Revision: 49557
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Sat Dec 8 16:06:58 2007
@@ -1284,6 +1284,10 @@
############################################################
cdef class TreeBuilder(_SaxParserTarget):
+ """Parser target that builds a tree.
+
+ The final tree is returned by the ``close()`` method.
+ """
cdef _BaseParser _parser
cdef object _factory
cdef object _data
From scoder at codespeak.net Sat Dec 8 16:07:02 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 8 Dec 2007 16:07:02 +0100 (CET)
Subject: [Lxml-checkins] r49558 - lxml/trunk/doc
Message-ID: <20071208150702.618C01684D7@codespeak.net>
Author: scoder
Date: Sat Dec 8 16:07:01 2007
New Revision: 49558
Modified:
lxml/trunk/doc/compatibility.txt
Log:
Modified: lxml/trunk/doc/compatibility.txt
==============================================================================
--- lxml/trunk/doc/compatibility.txt (original)
+++ lxml/trunk/doc/compatibility.txt Sat Dec 8 16:07:01 2007
@@ -3,10 +3,10 @@
=============================
A lot of care has been taken to ensure compatibility between etree and
-ElementTree. Nonetheless some differences and incompatibilities exist:
+ElementTree. Nonetheless, some differences and incompatibilities exist:
-* Importing etree is obviously different; etree uses a lower case
- package name, while ElementTree a combination of upper-case and
+* Importing etree is obviously different; etree uses a lower-case
+ package name, while ElementTree uses a combination of upper-case and
lower case in imports::
# etree
@@ -89,9 +89,10 @@
API. In general, etree tries to avoid AssertionErrors in favour of being
more specific about the reason for the exception.
-* When parsing fails in ``iterparse()``, ElementTree raises a low-level
- ExpatError instead of a SyntaxError as the other parsers. lxml.etree
- follows the other parts of the parser API and raises an (XML)SyntaxError.
+* When parsing fails in ``iterparse()``, ElementTree up to version
+ 1.2.x raises a low-level ``ExpatError`` instead of a ``SyntaxError``
+ as the other parsers. Both lxml and ElementTree 1.3 raise a
+ ``ParseError`` for parser errors.
* The ``iterparse()`` function in lxml is implemented based on the libxml2
parser and tree generator. This means that modifications of the document
@@ -115,8 +116,17 @@
tries to provide a default setup that is as close to the ElementTree parser
as possible.
-* ElementTree has a bug when serializing an empty Comment (no text argument
- given) to XML, etree serializes this successfully.
+* The ``TreeBuilder`` class of ``lxml.etree`` uses a different
+ signature for the ``start()`` method. It accepts an additional
+ argument ``nsmap`` to propagate the namespace declarations of an
+ element in addition to its own namespace. To assure compatibility
+ with ElementTree (which does not support this argument), lxml checks
+ if the method accepts 3 arguments before calling it, and otherwise
+ drops the namespace mapping. This should work with most existing
+ ElementTree code, although there may still be conflicting cases.
+
+* ElementTree 1.2 has a bug when serializing an empty Comment (no text
+ argument given) to XML, etree serializes this successfully.
* ElementTree adds whitespace around comments on serialization, lxml does
not. This means that a comment text "text" that ElementTree serializes as
From scoder at codespeak.net Sat Dec 8 16:07:05 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 8 Dec 2007 16:07:05 +0100 (CET)
Subject: [Lxml-checkins] r49559 - lxml/trunk
Message-ID: <20071208150705.5BCED168542@codespeak.net>
Author: scoder
Date: Sat Dec 8 16:07:04 2007
New Revision: 49559
Modified:
lxml/trunk/INSTALL.txt
Log:
Modified: lxml/trunk/INSTALL.txt
==============================================================================
--- lxml/trunk/INSTALL.txt (original)
+++ lxml/trunk/INSTALL.txt Sat Dec 8 16:07:04 2007
@@ -34,7 +34,7 @@
This has been reported to work on Linux, MacOS-X 10.4 and Windows, as long as
libxml2 and libxslt are properly installed (including development packages,
-i.e. header files etc.).
+i.e. header files, etc.).
Building lxml from sources
@@ -84,3 +84,5 @@
``DYLD_LIBRARY_PATH`` to the directory where fink keeps the libraries.
.. _fink: http://finkproject.org/
+
+A MacPort of lxml is available. Try ``port install py25-lxml``.
From lxml-checkins at codespeak.net Wed Dec 12 03:39:24 2007
From: lxml-checkins at codespeak.net (VIAGRA ® Official Site)
Date: Wed, 12 Dec 2007 03:39:24 +0100 (CET)
Subject: [Lxml-checkins] December 73% OFF
Message-ID: <20071212163926.6712.qmail@mx-ll-58.147.38-21.tttmaxnet.com>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20071212/ddc196c8/attachment.htm
From lxml-checkins at codespeak.net Thu Dec 13 19:33:43 2007
From: lxml-checkins at codespeak.net (VIAGRA ® Official Site)
Date: Thu, 13 Dec 2007 19:33:43 +0100 (CET)
Subject: [Lxml-checkins] December 79% OFF
Message-ID: <20071213-43344.4862.qmail@client-201.240.53.123.speedy.net.pe>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20071213/df75b6ea/attachment.htm
From scoder at codespeak.net Fri Dec 14 08:42:38 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 14 Dec 2007 08:42:38 +0100 (CET)
Subject: [Lxml-checkins] r49760 - in lxml/trunk: . src/lxml
Message-ID: <20071214074238.4049216851B@codespeak.net>
Author: scoder
Date: Fri Dec 14 08:42:36 2007
New Revision: 49760
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
r3087 at delle: sbehnel | 2007-12-09 17:54:38 +0100
cleanup
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Dec 14 08:42:36 2007
@@ -639,7 +639,7 @@
if self._c_node.parent != NULL and not _isElement(self._c_node.parent):
if element._c_node.type != tree.XML_PI_NODE:
if element._c_node.type != tree.XML_COMMENT_NODE:
- raise TypeError, "Only processing instructions and comments can be siblings of the root element"
+ raise TypeError("Only processing instructions and comments can be siblings of the root element")
element.tail = None
_appendSibling(self, element)
@@ -654,7 +654,7 @@
if self._c_node.parent != NULL and not _isElement(self._c_node.parent):
if element._c_node.type != tree.XML_PI_NODE:
if element._c_node.type != tree.XML_COMMENT_NODE:
- raise TypeError, "Only processing instructions and comments can be siblings of the root element"
+ raise TypeError("Only processing instructions and comments can be siblings of the root element")
element.tail = None
_prependSibling(self, element)
From scoder at codespeak.net Fri Dec 14 08:42:43 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 14 Dec 2007 08:42:43 +0100 (CET)
Subject: [Lxml-checkins] r49761 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20071214074243.9F74D16851D@codespeak.net>
Author: scoder
Date: Fri Dec 14 08:42:43 2007
New Revision: 49761
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/serializer.pxi
lxml/trunk/src/lxml/tests/test_etree.py
lxml/trunk/src/lxml/tests/test_sax.py
Log:
r3088 at delle: sbehnel | 2007-12-09 17:56:04 +0100
on serialisation: add new lines after root-level processing instructions and comments
Modified: lxml/trunk/src/lxml/serializer.pxi
==============================================================================
--- lxml/trunk/src/lxml/serializer.pxi (original)
+++ lxml/trunk/src/lxml/serializer.pxi Fri Dec 14 08:42:43 2007
@@ -236,12 +236,13 @@
# we are at a root node, so add PI and comment siblings
c_sibling = c_node
while c_sibling.prev != NULL and \
- (c_sibling.prev.type == tree.XML_PI_NODE or \
- c_sibling.prev.type == tree.XML_COMMENT_NODE):
+ (c_sibling.prev.type == tree.XML_PI_NODE or \
+ c_sibling.prev.type == tree.XML_COMMENT_NODE):
c_sibling = c_sibling.prev
while c_sibling != c_node:
tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
pretty_print, encoding)
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
c_sibling = c_sibling.next
cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
@@ -252,8 +253,9 @@
# we are at a root node, so add PI and comment siblings
c_sibling = c_node.next
while c_sibling != NULL and \
- (c_sibling.type == tree.XML_PI_NODE or \
- c_sibling.type == tree.XML_COMMENT_NODE):
+ (c_sibling.type == tree.XML_PI_NODE or \
+ c_sibling.type == tree.XML_COMMENT_NODE):
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
pretty_print, encoding)
c_sibling = c_sibling.next
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Fri Dec 14 08:42:43 2007
@@ -284,7 +284,7 @@
tostring = self.etree.tostring
XMLParser = self.etree.XMLParser
- xml = ''
+ xml = '\n\n'
f = StringIO(xml)
tree = parse(f)
Modified: lxml/trunk/src/lxml/tests/test_sax.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_sax.py (original)
+++ lxml/trunk/src/lxml/tests/test_sax.py Fri Dec 14 08:42:43 2007
@@ -44,9 +44,9 @@
xml_out)
def test_etree_sax_pi_root(self):
- tree = self.parse('ab')
+ tree = self.parse('\nab')
xml_out = self._saxify_serialize(tree)
- self.assertEquals('ab',
+ self.assertEquals('\nab',
xml_out)
def test_etree_sax_attributes(self):
From scoder at codespeak.net Fri Dec 14 08:42:53 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 14 Dec 2007 08:42:53 +0100 (CET)
Subject: [Lxml-checkins] r49762 - in lxml/trunk: . doc src/lxml
src/lxml/tests
Message-ID: <20071214074253.F408416851B@codespeak.net>
Author: scoder
Date: Fri Dec 14 08:42:53 2007
New Revision: 49762
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/doc/api.txt
lxml/trunk/doc/parsing.txt
lxml/trunk/doc/tutorial.txt
lxml/trunk/selftest.py
lxml/trunk/selftest2.py
lxml/trunk/src/lxml/serializer.pxi
lxml/trunk/src/lxml/tests/test_elementtree.py
lxml/trunk/src/lxml/tests/test_etree.py
lxml/trunk/src/lxml/tests/test_io.py
lxml/trunk/src/lxml/tests/test_sax.py
lxml/trunk/src/lxml/tests/test_xslt.py
Log:
r3089 at delle: sbehnel | 2007-12-09 18:45:51 +0100
append newline at the end of serialised documents
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Dec 14 08:42:53 2007
@@ -21,6 +21,10 @@
Other changes
-------------
+* The serialisation of ElementTree objects now appends a newline at
+ the end of the document and also inserts newlines between the
+ top-level processing instructions and comments
+
* Second argument to ``lxml.etree.Extension()`` helper is no longer
required, third argument is now a keyword-only argument ``ns``.
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Fri Dec 14 08:42:53 2007
@@ -112,6 +112,11 @@
d
>>> print etree.tostring(tree)
+
+
+ElementTree objects are serialised as complete documents, including
+preceding or trailing processing instructions and comments. Mind also
+the additional line break at the end.
All operations that you run on such an ElementTree (like XPath, XSLT, etc.)
will understand the explicitly chosen root as root node of a document. They
Modified: lxml/trunk/doc/parsing.txt
==============================================================================
--- lxml/trunk/doc/parsing.txt (original)
+++ lxml/trunk/doc/parsing.txt Fri Dec 14 08:42:53 2007
@@ -512,9 +512,9 @@
>>> etree.tounicode(el)
u''
- >>> et = etree.ElementTree(el)
- >>> etree.tounicode(et)
- u''
+ >>> tree = etree.ElementTree(el)
+ >>> etree.tounicode(tree)
+ u'\n'
The result of ``tounicode()`` can be treated like any other Python unicode
string and then passed back into the parsers. However, if you want to save
Modified: lxml/trunk/doc/tutorial.txt
==============================================================================
--- lxml/trunk/doc/tutorial.txt (original)
+++ lxml/trunk/doc/tutorial.txt Fri Dec 14 08:42:53 2007
@@ -396,12 +396,13 @@
The ElementTree class
=====================
-An ``ElementTree`` is mainly a document wrapper around a tree with a root
-node. It provides a couple of methods for parsing, serialisation and general
-document handling. One of the bigger differences is that it serialises as a
-complete document, as opposed to a single ``Element``. This includes top-level
-processing instructions and comments, as well as a DOCTYPE and other DTD
-content in the document::
+An ``ElementTree`` is mainly a document wrapper around a tree with a
+root node. It provides a couple of methods for parsing, serialisation
+and general document handling. One of the bigger differences is that
+it serialises as a complete document, as opposed to a single
+``Element``. This includes top-level processing instructions and
+comments, an additional line break at the end, as well as a DOCTYPE
+and other DTD content in the document::
>>> from StringIO import StringIO
>>> tree = etree.parse(StringIO('''\
@@ -423,6 +424,7 @@
eggs
+
>>> # lxml 1.3.4 and later
>>> print etree.tostring(etree.ElementTree(tree.getroot()))
@@ -432,6 +434,7 @@
eggs
+
>>> # ElementTree and lxml <= 1.3.3
>>> print etree.tostring(tree.getroot())
@@ -492,6 +495,7 @@
>>> print etree.tostring(tree)
data
+
Note that ``parse()`` returns an ElementTree object, not an Element object as
the string parser functions::
@@ -551,10 +555,11 @@
... except StopIteration:
... return ""
- >>> root = etree.parse(DataSource())
+ >>> tree = etree.parse(DataSource())
- >>> print etree.tostring(root)
+ >>> print etree.tostring(tree)
+
The second way is through a feed parser interface, given by the ``feed(data)``
and ``close()`` methods::
Modified: lxml/trunk/selftest.py
==============================================================================
--- lxml/trunk/selftest.py (original)
+++ lxml/trunk/selftest.py Fri Dec 14 08:42:53 2007
@@ -25,6 +25,8 @@
def fix_compatibility(xml_data):
xml_data = re.sub('\s*xmlns:[a-z0-9]+="http://www.w3.org/2001/XInclude"', '', xml_data)
xml_data = xml_data.replace(' />', '/>')
+ if xml_data[-1:] == '\n':
+ xml_data = xml_data[:-1]
return xml_data
def serialize(elem, **options):
Modified: lxml/trunk/selftest2.py
==============================================================================
--- lxml/trunk/selftest2.py (original)
+++ lxml/trunk/selftest2.py Fri Dec 14 08:42:53 2007
@@ -22,7 +22,11 @@
tree.write(file, encoding=encoding)
else:
tree.write(file)
- return file.getvalue().replace(' />', '/>')
+ result = file.getvalue()
+ result = result.replace(' />', '/>')
+ if result[-1:] == '\n':
+ result = result[:-1]
+ return result
def summarize(elem):
return elem.tag
Modified: lxml/trunk/src/lxml/serializer.pxi
==============================================================================
--- lxml/trunk/src/lxml/serializer.pxi (original)
+++ lxml/trunk/src/lxml/serializer.pxi Fri Dec 14 08:42:53 2007
@@ -172,6 +172,7 @@
_writeTail(c_buffer, c_node, encoding, pretty_print)
if write_complete_document:
_writeNextSiblings(c_buffer, c_node, encoding, pretty_print)
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
char* version, char* encoding) nogil:
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Fri Dec 14 08:42:53 2007
@@ -734,7 +734,7 @@
tree = ElementTree(element=html)
f = StringIO()
tree.write(f, method="html")
- data = f.getvalue()
+ data = f.getvalue().replace('\n','')
self.assertEquals('html
test
',
data)
@@ -2541,7 +2541,7 @@
tree = ElementTree(element=a)
tree.write(f, encoding='utf-8')
self.assertEquals(u'S?k p? nettet'.encode('UTF-8'),
- f.getvalue())
+ f.getvalue().replace('\n',''))
def test_parse_file_encoding(self):
parse = self.etree.parse
@@ -2574,9 +2574,7 @@
result = f.getvalue()
declaration = ""
self.assertEncodingDeclaration(result,'iso-8859-1')
- result = result.split('?>', 1)[-1]
- if result[0] == '\n':
- result = result[1:]
+ result = result.split('?>', 1)[-1].replace('\n','')
self.assertEquals(u'S?k p? nettet'.encode('iso-8859-1'),
result)
@@ -2629,7 +2627,7 @@
f = StringIO()
tree = ElementTree(element=a)
tree.write(f)
- data = f.getvalue()
+ data = f.getvalue().replace('\n','')
self.assertEquals(
u'S?k p? nettet'.encode('ASCII', 'xmlcharrefreplace'),
data)
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Fri Dec 14 08:42:53 2007
@@ -268,23 +268,23 @@
f.close()
def test_parse_remove_comments(self):
- parse = self.etree.parse
+ fromstring = self.etree.fromstring
tostring = self.etree.tostring
XMLParser = self.etree.XMLParser
- f = StringIO('')
+ xml = ''
parser = XMLParser(remove_comments=True)
- tree = parse(f, parser)
+ root = fromstring(xml, parser)
self.assertEquals(
'',
- tostring(tree))
+ tostring(root))
def test_parse_remove_pis(self):
parse = self.etree.parse
tostring = self.etree.tostring
XMLParser = self.etree.XMLParser
- xml = '\n\n'
+ xml = '\n\n\n'
f = StringIO(xml)
tree = parse(f)
@@ -292,11 +292,10 @@
xml,
tostring(tree))
- f = StringIO(xml)
parser = XMLParser(remove_pis=True)
tree = parse(f, parser)
self.assertEquals(
- '',
+ '\n',
tostring(tree))
def test_parse_parser_type_error(self):
@@ -1325,13 +1324,13 @@
def test_namespaces_reuse_after_move(self):
ns_href = "http://a.b.c"
- one = self.etree.parse(
- StringIO('' % ns_href))
- baz = one.getroot()[0][0]
-
- two = self.etree.parse(
- StringIO('' % ns_href))
- two.getroot().append(baz)
+ one = self.etree.fromstring(
+ '' % ns_href)
+ baz = one[0][0]
+
+ two = self.etree.fromstring(
+ '' % ns_href)
+ two.append(baz)
del one # make sure the source document is deallocated
self.assertEquals('{%s}baz' % ns_href, baz.tag)
@@ -1811,7 +1810,7 @@
self.assertEquals(docinfo.system_url, None)
self.assertEquals(docinfo.root_name, 'html')
self.assertEquals(docinfo.doctype, '')
-
+
def test_dtd_io(self):
# check that DTDs that go in also go back out
xml = '''\
@@ -1820,10 +1819,10 @@
]>
- test-test\
+ test-test
'''
- root = self.etree.parse(StringIO(xml))
- self.assertEqual(self.etree.tostring(root).replace(" ", ""),
+ tree = self.etree.parse(StringIO(xml))
+ self.assertEqual(self.etree.tostring(tree).replace(" ", ""),
xml.replace(" ", ""))
def test_byte_zero(self):
Modified: lxml/trunk/src/lxml/tests/test_io.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_io.py (original)
+++ lxml/trunk/src/lxml/tests/test_io.py Fri Dec 14 08:42:53 2007
@@ -70,7 +70,8 @@
handle, filename = tempfile.mkstemp(suffix=".xml")
self.tree.write(filename)
try:
- self.assertEqual(open(filename).read(), self.root_str)
+ self.assertEqual(open(filename).read().replace('\n', ''),
+ self.root_str)
finally:
os.close(handle)
os.remove(filename)
Modified: lxml/trunk/src/lxml/tests/test_sax.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_sax.py (original)
+++ lxml/trunk/src/lxml/tests/test_sax.py Fri Dec 14 08:42:53 2007
@@ -44,9 +44,9 @@
xml_out)
def test_etree_sax_pi_root(self):
- tree = self.parse('\nab')
+ tree = self.parse('ab')
xml_out = self._saxify_serialize(tree)
- self.assertEquals('\nab',
+ self.assertEquals('ab',
xml_out)
def test_etree_sax_attributes(self):
@@ -211,7 +211,7 @@
new_tree = self._saxify_unsaxify(tree)
f = StringIO()
new_tree.write(f)
- return f.getvalue()
+ return f.getvalue().replace('\n', '')
def test_suite():
Modified: lxml/trunk/src/lxml/tests/test_xslt.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xslt.py (original)
+++ lxml/trunk/src/lxml/tests/test_xslt.py Fri Dec 14 08:42:53 2007
@@ -139,12 +139,12 @@
st = etree.XSLT(style)
res = st.apply(tree)
expected = u"""\
-
+\
\uF8D2"""
f = StringIO()
res.write(f, encoding='UTF-16')
- result = unicode(f.getvalue(), 'UTF-16')
+ result = unicode(f.getvalue(), 'UTF-16').replace('\n', '')
self.assertEquals(expected,
result)
From scoder at codespeak.net Fri Dec 14 08:42:58 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 14 Dec 2007 08:42:58 +0100 (CET)
Subject: [Lxml-checkins] r49763 - in lxml/trunk: . src/lxml
Message-ID: <20071214074258.C0F2916851E@codespeak.net>
Author: scoder
Date: Fri Dec 14 08:42:58 2007
New Revision: 49763
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
r3090 at delle: sbehnel | 2007-12-10 00:12:12 +0100
fix for parser memory leak after switching to Cython 0.9.6.8
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Dec 14 08:42:58 2007
@@ -2276,7 +2276,7 @@
cdef _Document doc
try:
doc = _parseDocument(source, parser)
- return ElementTree(doc.getroot())
+ return _elementTreeFactory(doc, None)
except _TargetParserResult, result_container:
return result_container.result
From scoder at codespeak.net Fri Dec 14 08:43:02 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 14 Dec 2007 08:43:02 +0100 (CET)
Subject: [Lxml-checkins] r49764 - in lxml/trunk: . src/lxml
Message-ID: <20071214074302.8C5CB168521@codespeak.net>
Author: scoder
Date: Fri Dec 14 08:43:02 2007
New Revision: 49764
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
r3091 at delle: sbehnel | 2007-12-10 00:12:34 +0100
dropped redundant code
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Dec 14 08:43:02 2007
@@ -2172,8 +2172,6 @@
(DTD, XInclude, ...).
"""
cdef _Document doc
- if parser is None:
- parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
try:
doc = _parseMemoryDocument(text, base_url, parser)
return doc.getroot()
From scoder at codespeak.net Fri Dec 14 08:43:05 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 14 Dec 2007 08:43:05 +0100 (CET)
Subject: [Lxml-checkins] r49765 - in lxml/trunk: . src/lxml
Message-ID: <20071214074305.DF526168521@codespeak.net>
Author: scoder
Date: Fri Dec 14 08:43:05 2007
New Revision: 49765
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/serializer.pxi
Log:
r3092 at delle: sbehnel | 2007-12-12 20:24:55 +0100
only append newlines when pretty printing
Modified: lxml/trunk/src/lxml/serializer.pxi
==============================================================================
--- lxml/trunk/src/lxml/serializer.pxi (original)
+++ lxml/trunk/src/lxml/serializer.pxi Fri Dec 14 08:43:05 2007
@@ -172,7 +172,8 @@
_writeTail(c_buffer, c_node, encoding, pretty_print)
if write_complete_document:
_writeNextSiblings(c_buffer, c_node, encoding, pretty_print)
- tree.xmlOutputBufferWriteString(c_buffer, "\n")
+ if pretty_print:
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
char* version, char* encoding) nogil:
@@ -243,7 +244,8 @@
while c_sibling != c_node:
tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
pretty_print, encoding)
- tree.xmlOutputBufferWriteString(c_buffer, "\n")
+ if pretty_print:
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
c_sibling = c_sibling.next
cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
@@ -256,7 +258,8 @@
while c_sibling != NULL and \
(c_sibling.type == tree.XML_PI_NODE or \
c_sibling.type == tree.XML_COMMENT_NODE):
- tree.xmlOutputBufferWriteString(c_buffer, "\n")
+ if pretty_print:
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
pretty_print, encoding)
c_sibling = c_sibling.next
@@ -407,5 +410,7 @@
c_buffer = tree.xmlOutputBufferCreateFile(python.PyFile_AsFile(f), NULL)
tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, pretty_print, NULL)
_writeTail(c_buffer, c_node, NULL, 0)
- tree.xmlOutputBufferWriteString(c_buffer, '\n')
+ if not pretty_print:
+ # not written yet
+ tree.xmlOutputBufferWriteString(c_buffer, '\n')
tree.xmlOutputBufferFlush(c_buffer)
From scoder at codespeak.net Fri Dec 14 08:47:10 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 14 Dec 2007 08:47:10 +0100 (CET)
Subject: [Lxml-checkins] r49766 - lxml/trunk
Message-ID: <20071214074710.41A611684E6@codespeak.net>
Author: scoder
Date: Fri Dec 14 08:47:09 2007
New Revision: 49766
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
Log:
r3099 at delle: sbehnel | 2007-12-14 08:47:04 +0100
changelog
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Dec 14 08:47:09 2007
@@ -14,6 +14,8 @@
Bugs fixed
----------
+* Memory leak in the ``parse()`` function.
+
* Minor bugs in XSLT error message formatting.
* Result document memory leak in target parser.
@@ -21,9 +23,9 @@
Other changes
-------------
-* The serialisation of ElementTree objects now appends a newline at
- the end of the document and also inserts newlines between the
- top-level processing instructions and comments
+* The 'pretty printed' serialisation of ElementTree objects now
+ appends a newline at the end of the document and also inserts
+ newlines between the top-level processing instructions and comments
* Second argument to ``lxml.etree.Extension()`` helper is no longer
required, third argument is now a keyword-only argument ``ns``.
From ianb at codespeak.net Tue Dec 18 22:04:53 2007
From: ianb at codespeak.net (ianb at codespeak.net)
Date: Tue, 18 Dec 2007 22:04:53 +0100 (CET)
Subject: [Lxml-checkins] r49903 - in lxml/trunk: . src/lxml/html
Message-ID: <20071218210453.933281684F1@codespeak.net>
Author: ianb
Date: Tue Dec 18 22:04:53 2007
New Revision: 49903
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/html/__init__.py
Log:
Added encoding argument to lxml.html.tostring
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Tue Dec 18 22:04:53 2007
@@ -30,6 +30,7 @@
* Second argument to ``lxml.etree.Extension()`` helper is no longer
required, third argument is now a keyword-only argument ``ns``.
+* ``lxml.html.tostring`` takes an ``encoding`` argument.
2.0alpha5 (2007-11-24)
======================
Modified: lxml/trunk/src/lxml/html/__init__.py
==============================================================================
--- lxml/trunk/src/lxml/html/__init__.py (original)
+++ lxml/trunk/src/lxml/html/__init__.py Tue Dec 18 22:04:53 2007
@@ -1259,15 +1259,18 @@
__replace_meta_content_type = re.compile(
r'').sub
-def tostring(doc, pretty_print=False, include_meta_content_type=False):
+def tostring(doc, pretty_print=False, include_meta_content_type=False,
+ encoding=None):
"""
- return HTML string representation of the document given
+ return HTML string representation of the document given
- note: this will create a meta http-equiv="Content" tag in the head
- and may replace any that are present
+ note: if include_meta_content_type is true this will create a meta
+ http-equiv="Content" tag in the head; regardless of the value of include_meta_content_type
+ any existing meta http-equiv="Content" tag will be removed
"""
assert doc is not None
- html = etree.tostring(doc, method="html", pretty_print=pretty_print)
+ html = etree.tostring(doc, method="html", pretty_print=pretty_print,
+ encoding=encoding)
if not include_meta_content_type:
html = __replace_meta_content_type('', html)
return html
From scoder at codespeak.net Tue Dec 18 22:28:39 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 18 Dec 2007 22:28:39 +0100 (CET)
Subject: [Lxml-checkins] r49904 - in lxml/trunk: . doc
Message-ID: <20071218212839.E4021168443@codespeak.net>
Author: scoder
Date: Tue Dec 18 22:28:37 2007
New Revision: 49904
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/validation.txt
Log:
r3101 at delle: sbehnel | 2007-12-18 09:24:09 +0100
small doc update
Modified: lxml/trunk/doc/validation.txt
==============================================================================
--- lxml/trunk/doc/validation.txt (original)
+++ lxml/trunk/doc/validation.txt Tue Dec 18 22:28:37 2007
@@ -232,10 +232,10 @@
Schematron
----------
-Since version 2.0, lxml.etree features Schematron_ support, using the class
-lxml.etree.Schematron. It requires libxml2 2.6.21. The API is the same as
-for the other validators. Pass an ElementTree object to construct a
-Schematron validator::
+Since version 2.0, lxml.etree features Schematron_ support, using the
+class lxml.etree.Schematron. It requires at least libxml2 2.6.21 to
+work. The API is the same as for the other validators. Pass an
+ElementTree object to construct a Schematron validator::
>>> f = StringIO('''\
...
From scoder at codespeak.net Tue Dec 18 22:28:46 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 18 Dec 2007 22:28:46 +0100 (CET)
Subject: [Lxml-checkins] r49905 - in lxml/trunk: . doc src/lxml/tests
Message-ID: <20071218212846.51E35168448@codespeak.net>
Author: scoder
Date: Tue Dec 18 22:28:45 2007
New Revision: 49905
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/api.txt
lxml/trunk/doc/parsing.txt
lxml/trunk/doc/tutorial.txt
lxml/trunk/src/lxml/tests/test_etree.py
Log:
r3102 at delle: sbehnel | 2007-12-18 10:09:44 +0100
fixed test cases after newline-after-document serialisation change
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Tue Dec 18 22:28:45 2007
@@ -112,11 +112,9 @@
d
>>> print etree.tostring(tree)
-
ElementTree objects are serialised as complete documents, including
-preceding or trailing processing instructions and comments. Mind also
-the additional line break at the end.
+preceding or trailing processing instructions and comments.
All operations that you run on such an ElementTree (like XPath, XSLT, etc.)
will understand the explicitly chosen root as root node of a document. They
Modified: lxml/trunk/doc/parsing.txt
==============================================================================
--- lxml/trunk/doc/parsing.txt (original)
+++ lxml/trunk/doc/parsing.txt Tue Dec 18 22:28:45 2007
@@ -514,7 +514,7 @@
>>> tree = etree.ElementTree(el)
>>> etree.tounicode(tree)
- u'\n'
+ u''
The result of ``tounicode()`` can be treated like any other Python unicode
string and then passed back into the parsers. However, if you want to save
Modified: lxml/trunk/doc/tutorial.txt
==============================================================================
--- lxml/trunk/doc/tutorial.txt (original)
+++ lxml/trunk/doc/tutorial.txt Tue Dec 18 22:28:45 2007
@@ -401,8 +401,7 @@
and general document handling. One of the bigger differences is that
it serialises as a complete document, as opposed to a single
``Element``. This includes top-level processing instructions and
-comments, an additional line break at the end, as well as a DOCTYPE
-and other DTD content in the document::
+comments, as well as a DOCTYPE and other DTD content in the document::
>>> from StringIO import StringIO
>>> tree = etree.parse(StringIO('''\
@@ -424,7 +423,6 @@
eggs
-
>>> # lxml 1.3.4 and later
>>> print etree.tostring(etree.ElementTree(tree.getroot()))
@@ -434,7 +432,6 @@
eggs
-
>>> # ElementTree and lxml <= 1.3.3
>>> print etree.tostring(tree.getroot())
@@ -495,7 +492,6 @@
>>> print etree.tostring(tree)
data
-
Note that ``parse()`` returns an ElementTree object, not an Element object as
the string parser functions::
@@ -559,7 +555,6 @@
>>> print etree.tostring(tree)
-
The second way is through a feed parser interface, given by the ``feed(data)``
and ``close()`` methods::
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Tue Dec 18 22:28:45 2007
@@ -284,7 +284,7 @@
tostring = self.etree.tostring
XMLParser = self.etree.XMLParser
- xml = '\n\n\n'
+ xml = ''
f = StringIO(xml)
tree = parse(f)
@@ -295,7 +295,7 @@
parser = XMLParser(remove_pis=True)
tree = parse(f, parser)
self.assertEquals(
- '\n',
+ '',
tostring(tree))
def test_parse_parser_type_error(self):
@@ -1819,7 +1819,7 @@
]>
- test-test
+ test-test\
'''
tree = self.etree.parse(StringIO(xml))
self.assertEqual(self.etree.tostring(tree).replace(" ", ""),
From scoder at codespeak.net Tue Dec 18 22:28:51 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 18 Dec 2007 22:28:51 +0100 (CET)
Subject: [Lxml-checkins] r49906 - in lxml/trunk: . src/lxml
Message-ID: <20071218212851.1664E168464@codespeak.net>
Author: scoder
Date: Tue Dec 18 22:28:50 2007
New Revision: 49906
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
r3103 at delle: sbehnel | 2007-12-18 10:11:28 +0100
reverted getiterator() behaviour to returning a real iterator, method is now officially deprecated
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Tue Dec 18 22:28:50 2007
@@ -23,6 +23,13 @@
Other changes
-------------
+* The ``getiterator()`` method on Elements and ElementTrees was
+ reverted to return an iterator as it did in lxml 1.x. The ET API
+ specification allows it to return either a sequence or an iterator,
+ and it traditionally returned a sequence in ET and an iterator in
+ lxml. However, it is now deprecated in favour of the ``iter()``
+ method, which should be used in new code wherever possible.
+
* The 'pretty printed' serialisation of ElementTree objects now
appends a newline at the end of the document and also inserts
newlines between the top-level processing instructions and comments
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Dec 18 22:28:50 2007
@@ -1099,20 +1099,27 @@
return _elementTreeFactory(self._doc, None)
def getiterator(self, tag=None):
- """Returns a sequence of all elements in the subtree in document order
- (depth first pre-order), starting with this element.
+ """Returns a sequence or iterator of all elements in the subtree in
+ document order (depth first pre-order), starting with this
+ element.
- Can be restricted to find only elements with a specific tag or from a
- namespace.
+ Can be restricted to find only elements with a specific tag
+ (pass ``tag="xyz"``) or from a namespace (pass ``tag="{ns}*"``).
You can also pass the Element, Comment, ProcessingInstruction and
Entity factory functions to look only for the specific element type.
- Note that this method previously returned an iterator, which diverged
- from the original ElementTree behaviour. If you want an efficient
- iterator, use the ``el.iter()`` method instead.
+ Note that this method is deprecated as of ElementTree 1.3 and
+ lxml 2.0. It returns an iterator in lxml, which diverges from
+ the original ElementTree behaviour. If you want an efficient
+ iterator, use the ``element.iter()`` method instead. You
+ should only use this method in new code if you require
+ backwards compatibility with older versions of lxml or
+ ElementTree.
+
+ @deprecated
"""
- return list(ElementDepthFirstIterator(self, tag))
+ return ElementDepthFirstIterator(self, tag)
def iter(self, tag=None):
"""Iterate over all elements in the subtree in document order (depth
@@ -1456,17 +1463,29 @@
return path
def getiterator(self, tag=None):
- """Creates an iterator for the root element. The iterator loops over all elements
- in this tree, in document order.
+ """Returns a sequence or iterator of all elements in document order
+ (depth first pre-order), starting with the root element.
+
+ Can be restricted to find only elements with a specific tag
+ (pass ``tag="xyz"`` or ``tag="{ns}xyz"``) or from a namespace
+ (pass ``tag="{ns}*"``).
- Note that this method is deprecated in favour of the ``el.iter()``
- method. In new code, use it only if you require backwards
- compatibility.
+ You can also pass the Element, Comment, ProcessingInstruction and
+ Entity factory functions to look only for the specific element type.
+
+ Note that this method is deprecated as of ElementTree 1.3 and
+ lxml 2.0. It returns an iterator in lxml, which diverges from
+ the original ElementTree behaviour. If you want an efficient
+ iterator, use the ``tree.iter()`` method instead. You should
+ only use this method in new code if you require backwards
+ compatibility with older versions of lxml or ElementTree.
+
+ @deprecated
"""
root = self.getroot()
if root is None:
return ()
- return root.iter(tag)
+ return root.getiterator(tag)
def iter(self, tag=None):
"""Creates an iterator for the root element. The iterator loops over
@@ -1479,7 +1498,7 @@
def find(self, path):
"""Finds the first toplevel element with given tag. Same as
- getroot().find(path).
+ ``tree.getroot().find(path)``.
"""
self._assertHasRoot()
root = self.getroot()
From scoder at codespeak.net Tue Dec 18 22:28:56 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 18 Dec 2007 22:28:56 +0100 (CET)
Subject: [Lxml-checkins] r49907 - in lxml/trunk: . doc
Message-ID: <20071218212856.39E651684EF@codespeak.net>
Author: scoder
Date: Tue Dec 18 22:28:55 2007
New Revision: 49907
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/doc/main.txt
lxml/trunk/version.txt
Log:
r3104 at delle: sbehnel | 2007-12-18 10:16:48 +0100
prepare release of lxml 2.0beta1
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Tue Dec 18 22:28:55 2007
@@ -2,8 +2,8 @@
lxml changelog
==============
-Under development
-=================
+2.0beta1 (2007-12-18)
+=====================
Features added
--------------
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Tue Dec 18 22:28:55 2007
@@ -138,8 +138,8 @@
.. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/
.. _`this key`: pubkey.asc
-The latest version is `lxml 2.0alpha5`_, released 2007-11-24
-(`changes for 2.0alpha5`_). `Older versions`_ are listed below.
+The latest version is `lxml 2.0beta1`_, released 2007-12-18
+(`changes for 2.0beta1`_). `Older versions`_ are listed below.
.. _`Older versions`: #old-versions
@@ -199,6 +199,8 @@
Old Versions
------------
+* `lxml 2.0alpha5`_, released 2007-11-24 (`changes for 2.0alpha5`_)
+
* `lxml 2.0alpha4`_, released 2007-10-07 (`changes for 2.0alpha4`_)
* `lxml 2.0alpha3`_, released 2007-09-26 (`changes for 2.0alpha3`_)
@@ -257,6 +259,7 @@
* `lxml 0.5`_, released 2005-04-08
+.. _`lxml 2.0beta1`: lxml-2.0beta1.tgz
.. _`lxml 2.0alpha5`: lxml-2.0alpha5.tgz
.. _`lxml 2.0alpha4`: lxml-2.0alpha4.tgz
.. _`lxml 2.0alpha3`: lxml-2.0alpha3.tgz
@@ -287,6 +290,7 @@
.. _`lxml 0.5.1`: lxml-0.5.1.tgz
.. _`lxml 0.5`: lxml-0.5.tgz
+.. _`changes for 2.0beta1`: changes-2.0beta1.html
.. _`changes for 2.0alpha5`: changes-2.0alpha5.html
.. _`changes for 2.0alpha4`: changes-2.0alpha4.html
.. _`changes for 2.0alpha3`: changes-2.0alpha3.html
Modified: lxml/trunk/version.txt
==============================================================================
--- lxml/trunk/version.txt (original)
+++ lxml/trunk/version.txt Tue Dec 18 22:28:55 2007
@@ -1 +1 @@
-2.0alpha5
+2.0beta1
From scoder at codespeak.net Tue Dec 18 22:28:59 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 18 Dec 2007 22:28:59 +0100 (CET)
Subject: [Lxml-checkins] r49908 - in lxml/trunk: . src/lxml
Message-ID: <20071218212859.131661684F2@codespeak.net>
Author: scoder
Date: Tue Dec 18 22:28:59 2007
New Revision: 49908
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
r3105 at delle: sbehnel | 2007-12-18 11:17:22 +0100
mark getchildren() as deprecated
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Dec 18 22:28:59 2007
@@ -1109,15 +1109,13 @@
You can also pass the Element, Comment, ProcessingInstruction and
Entity factory functions to look only for the specific element type.
- Note that this method is deprecated as of ElementTree 1.3 and
- lxml 2.0. It returns an iterator in lxml, which diverges from
- the original ElementTree behaviour. If you want an efficient
- iterator, use the ``element.iter()`` method instead. You
- should only use this method in new code if you require
- backwards compatibility with older versions of lxml or
- ElementTree.
-
- @deprecated
+ @deprecated: Note that this method is deprecated as of
+ ElementTree 1.3 and lxml 2.0. It returns an iterator in lxml,
+ which diverges from the original ElementTree behaviour. If
+ you want an efficient iterator, use the ``element.iter()``
+ method instead. You should only use this method in new code
+ if you require backwards compatibility with older versions of
+ lxml or ElementTree.
"""
return ElementDepthFirstIterator(self, tag)
@@ -1473,14 +1471,13 @@
You can also pass the Element, Comment, ProcessingInstruction and
Entity factory functions to look only for the specific element type.
- Note that this method is deprecated as of ElementTree 1.3 and
- lxml 2.0. It returns an iterator in lxml, which diverges from
- the original ElementTree behaviour. If you want an efficient
- iterator, use the ``tree.iter()`` method instead. You should
- only use this method in new code if you require backwards
- compatibility with older versions of lxml or ElementTree.
-
- @deprecated
+ @deprecated: Note that this method is deprecated as of
+ ElementTree 1.3 and lxml 2.0. It returns an iterator in lxml,
+ which diverges from the original ElementTree behaviour. If
+ you want an efficient iterator, use the ``tree.iter()`` method
+ instead. You should only use this method in new code if you
+ require backwards compatibility with older versions of lxml or
+ ElementTree.
"""
root = self.getroot()
if root is None:
From scoder at codespeak.net Tue Dec 18 22:29:03 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 18 Dec 2007 22:29:03 +0100 (CET)
Subject: [Lxml-checkins] r49909 - in lxml/trunk: . doc
Message-ID: <20071218212903.9389D1684EF@codespeak.net>
Author: scoder
Date: Tue Dec 18 22:29:03 2007
New Revision: 49909
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/api.txt
Log:
r3106 at delle: sbehnel | 2007-12-18 18:52:41 +0100
cleanup in API docs
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Tue Dec 18 22:29:03 2007
@@ -141,45 +141,48 @@
The ElementTree API makes Elements iterable to supports iteration over their
children. Using the tree defined above, we get::
- >>> [ el.tag for el in root ]
+ >>> [ child.tag for child in root ]
['a', 'b', 'c', 'd']
-Tree traversal is commonly based on the ``element.getiterator()`` method::
+To iterate in the opposite direction, use the ``reversed()`` function
+that exists in Python 2.4 and later.
- >>> [ el.tag for el in root.getiterator() ]
+Tree traversal should use the ``element.iter()`` method::
+
+ >>> [ el.tag for el in root.iter() ]
['root', 'a', 'b', 'c', 'd', 'e']
lxml.etree also supports this, but additionally features an extended API for
iteration over the children, following/preceding siblings, ancestors and
descendants of an element, as defined by the respective XPath axis::
- >>> [ el.tag for el in root.iterchildren() ]
+ >>> [ child.tag for child in root.iterchildren() ]
['a', 'b', 'c', 'd']
- >>> [ el.tag for el in root.iterchildren(reversed=True) ]
+ >>> [ child.tag for child in root.iterchildren(reversed=True) ]
['d', 'c', 'b', 'a']
- >>> [ el.tag for el in b.itersiblings() ]
+ >>> [ sibling.tag for sibling in b.itersiblings() ]
['c', 'd']
- >>> [ el.tag for el in c.itersiblings(preceding=True) ]
+ >>> [ sibling.tag for sibling in c.itersiblings(preceding=True) ]
['b', 'a']
- >>> [ el.tag for el in e.iterancestors() ]
+ >>> [ ancestor.tag for ancestor in e.iterancestors() ]
['d', 'root']
>>> [ el.tag for el in root.iterdescendants() ]
['a', 'b', 'c', 'd', 'e']
-Note how ``element.iterdescendants()`` does not include the element itself, as
-opposed to ``element.getiterator()``. The latter effectively implements the
-'descendant-or-self' axis in XPath.
+Note how ``element.iterdescendants()`` does not include the element
+itself, as opposed to ``element.iter()``. The latter effectively
+implements the 'descendant-or-self' axis in XPath.
All of these iterators support an additional ``tag`` keyword argument that
filters the generated elements by tag name::
- >>> [ el.tag for el in root.iterchildren(tag='a') ]
+ >>> [ child.tag for child in root.iterchildren(tag='a') ]
['a']
- >>> [ el.tag for el in d.iterchildren(tag='a') ]
+ >>> [ child.tag for child in d.iterchildren(tag='a') ]
[]
>>> [ el.tag for el in root.iterdescendants(tag='d') ]
['d']
- >>> [ el.tag for el in root.getiterator(tag='d') ]
+ >>> [ el.tag for el in root.iter(tag='d') ]
['d']
See also the section on the utility functions ``iterparse()`` and
From scoder at codespeak.net Tue Dec 18 22:29:10 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 18 Dec 2007 22:29:10 +0100 (CET)
Subject: [Lxml-checkins] r49910 - in lxml/trunk: . doc src/lxml
src/lxml/tests
Message-ID: <20071218212910.418DB168464@codespeak.net>
Author: scoder
Date: Tue Dec 18 22:29:09 2007
New Revision: 49910
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/doc/api.txt
lxml/trunk/doc/elementsoup.txt
lxml/trunk/doc/parsing.txt
lxml/trunk/doc/tutorial.txt
lxml/trunk/src/lxml/serializer.pxi
lxml/trunk/src/lxml/tests/test_etree.py
lxml/trunk/src/lxml/tests/test_htmlparser.py
Log:
r3107 at delle: sbehnel | 2007-12-18 19:01:53 +0100
always append a newline when pretty printing on serialisation (not only for ElementTrees)
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Tue Dec 18 22:29:09 2007
@@ -31,8 +31,10 @@
method, which should be used in new code wherever possible.
* The 'pretty printed' serialisation of ElementTree objects now
- appends a newline at the end of the document and also inserts
- newlines between the top-level processing instructions and comments
+ inserts newlines at the root level between processing instructions,
+ comments and the root tag.
+
+* A 'pretty printed' serialisation is now terminated with a newline.
* Second argument to ``lxml.etree.Extension()`` helper is no longer
required, third argument is now a keyword-only argument ``ns``.
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Tue Dec 18 22:29:09 2007
@@ -266,11 +266,14 @@
>>> print etree.tostring(root)
- >>> print etree.tostring(root, pretty_print=True)
+ >>> print etree.tostring(root, pretty_print=True),
+Note the newline that is appended at the end when pretty printing the
+output. It was added in lxml 2.0.
+
By default, lxml (just as ElementTree) outputs the XML declaration only if it
is required by the standard::
Modified: lxml/trunk/doc/elementsoup.txt
==============================================================================
--- lxml/trunk/doc/elementsoup.txt (original)
+++ lxml/trunk/doc/elementsoup.txt Tue Dec 18 22:29:09 2007
@@ -30,7 +30,7 @@
To see what we have here, you can serialise it::
>>> from lxml.etree import tostring
- >>> print tostring(root, pretty_print=True)
+ >>> print tostring(root, pretty_print=True),
Modified: lxml/trunk/doc/parsing.txt
==============================================================================
--- lxml/trunk/doc/parsing.txt (original)
+++ lxml/trunk/doc/parsing.txt Tue Dec 18 22:29:09 2007
@@ -139,7 +139,7 @@
>>> parser = etree.HTMLParser()
>>> tree = etree.parse(StringIO(broken_html), parser)
- >>> print etree.tostring(tree.getroot(), pretty_print=True)
+ >>> print etree.tostring(tree.getroot(), pretty_print=True),
test
@@ -153,7 +153,7 @@
ElementTree::
>>> html = etree.HTML(broken_html)
- >>> print etree.tostring(html, pretty_print=True)
+ >>> print etree.tostring(html, pretty_print=True),
test
Modified: lxml/trunk/doc/tutorial.txt
==============================================================================
--- lxml/trunk/doc/tutorial.txt (original)
+++ lxml/trunk/doc/tutorial.txt Tue Dec 18 22:29:09 2007
@@ -104,7 +104,7 @@
To see that this is really XML, you can serialise the tree you have created::
- >>> print etree.tostring(root, pretty_print=True)
+ >>> print etree.tostring(root, pretty_print=True),
@@ -306,7 +306,7 @@
>>> etree.SubElement(root, "child").text = "Child 2"
>>> etree.SubElement(root, "another").text = "Child 3"
- >>> print etree.tostring(root, pretty_print=True)
+ >>> print etree.tostring(root, pretty_print=True),
Child 1
Child 2
@@ -337,10 +337,10 @@
Serialisation
-------------
-Serialisation commonly uses with the ``tostring()`` function that
-returns a string, or the ``ElementTree.write()`` method that writes to
-a file or file-like object. Both accept the same keyword arguments
-like ``pretty_print`` for formatted output or ``encoding`` to select a
+Serialisation commonly uses the ``tostring()`` function that returns a
+string, or the ``ElementTree.write()`` method that writes to a file or
+file-like object. Both accept the same keyword arguments like
+``pretty_print`` for formatted output or ``encoding`` to select a
specific output encoding other than plain ASCII::
>>> root = etree.XML('')
@@ -356,13 +356,16 @@
- >>> print etree.tostring(root, pretty_print=True)
+ >>> print etree.tostring(root, pretty_print=True),
+Note the newline that is appended at the end when pretty printing the
+output.
+
Since lxml 2.0 (and ElementTree 1.3), the serialisation functions can
do more than XML serialisation. You can serialise to HTML or extract
the text content by passing the ``method`` keyword::
@@ -378,7 +381,7 @@
>>> print etree.tostring(root, method='html')
Hello
World
- >>> print etree.tostring(root, method='html', pretty_print=True)
+ >>> print etree.tostring(root, method='html', pretty_print=True),
Hello
World
@@ -657,7 +660,7 @@
>>> body = etree.SubElement(xhtml, "{http://www.w3.org/1999/xhtml}body")
>>> body.text = "Hello World"
- >>> print etree.tostring(xhtml, pretty_print=True)
+ >>> print etree.tostring(xhtml, pretty_print=True),
Hello World
@@ -680,7 +683,7 @@
>>> body = etree.SubElement(xhtml, XHTML + "body")
>>> body.text = "Hello World"
- >>> print etree.tostring(xhtml, pretty_print=True)
+ >>> print etree.tostring(xhtml, pretty_print=True),
Hello World
@@ -689,7 +692,7 @@
>>> body.set(XHTML + "bgcolor", "#CCFFAA")
- >>> print etree.tostring(xhtml, pretty_print=True)
+ >>> print etree.tostring(xhtml, pretty_print=True),
Hello World
@@ -736,7 +739,7 @@
... )
... )
- >>> print etree.tostring(page, pretty_print=True)
+ >>> print etree.tostring(page, pretty_print=True),
This is a sample document
@@ -777,7 +780,7 @@
... )
... )
- >>> print etree.tostring(my_doc, pretty_print=True)
+ >>> print etree.tostring(my_doc, pretty_print=True),
The dog and the hog
Modified: lxml/trunk/src/lxml/serializer.pxi
==============================================================================
--- lxml/trunk/src/lxml/serializer.pxi (original)
+++ lxml/trunk/src/lxml/serializer.pxi Tue Dec 18 22:29:09 2007
@@ -172,8 +172,8 @@
_writeTail(c_buffer, c_node, encoding, pretty_print)
if write_complete_document:
_writeNextSiblings(c_buffer, c_node, encoding, pretty_print)
- if pretty_print:
- tree.xmlOutputBufferWriteString(c_buffer, "\n")
+ if pretty_print:
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
char* version, char* encoding) nogil:
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Tue Dec 18 22:29:09 2007
@@ -1904,7 +1904,7 @@
self.assertEquals(result, "")
result = tostring(a, pretty_print=True)
- self.assertEquals(result, "\n \n \n")
+ self.assertEquals(result, "\n \n \n\n")
def test_tostring_method_text_encoding(self):
tostring = self.etree.tostring
@@ -1989,7 +1989,7 @@
self.assertEquals(result, "")
result = tounicode(a, pretty_print=True)
- self.assertEquals(result, "\n \n \n")
+ self.assertEquals(result, "\n \n \n\n")
def _writeElement(self, element, encoding='us-ascii'):
"""Write out element for comparison.
Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_htmlparser.py (original)
+++ lxml/trunk/src/lxml/tests/test_htmlparser.py Tue Dec 18 22:29:09 2007
@@ -20,7 +20,8 @@
test
page title
-"""
+
+"""
broken_html_str = "testpage title
"
uhtml_str = u"test ??\uF8D2page ??\uF8D2 title
"
From scoder at codespeak.net Tue Dec 18 22:29:13 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 18 Dec 2007 22:29:13 +0100 (CET)
Subject: [Lxml-checkins] r49911 - in lxml/trunk: . src/lxml
Message-ID: <20071218212913.ED14A1684F0@codespeak.net>
Author: scoder
Date: Tue Dec 18 22:29:13 2007
New Revision: 49911
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
r3108 at delle: sbehnel | 2007-12-18 20:38:06 +0100
deprecation of getchildren()
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Dec 18 22:29:13 2007
@@ -1020,8 +1020,9 @@
"""Returns all direct children. The elements are returned in document
order.
- Note that this method has been deprecated as of ElementTree 1.3. New
- code should use ``list(element)`` or simply iterate over elements.
+ @deprecated: Note that this method has been deprecated as of
+ ElementTree 1.3 and lxml 2.0. New code should use
+ ``list(element)`` or simply iterate over elements.
"""
return _collectChildren(self)
From scoder at codespeak.net Tue Dec 18 22:29:17 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 18 Dec 2007 22:29:17 +0100 (CET)
Subject: [Lxml-checkins] r49912 - in lxml/trunk: . src/lxml
Message-ID: <20071218212917.0A4931684F2@codespeak.net>
Author: scoder
Date: Tue Dec 18 22:29:16 2007
New Revision: 49912
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
r3109 at delle: sbehnel | 2007-12-18 20:57:43 +0100
made normal case in index() explicit
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Dec 18 22:29:16 2007
@@ -934,6 +934,17 @@
if c_child.parent is not self._c_node:
raise ValueError, "Element is not a child of this node."
+ # handle the unbounded search straight away (normal case)
+ if stop is None and (start is None or start == 0):
+ k = 0
+ c_child = c_child.prev
+ while c_child is not NULL:
+ if _isElement(c_child):
+ k = k + 1
+ c_child = c_child.prev
+ return k
+
+ # check indices
if start is None:
c_start = 0
else:
From scoder at codespeak.net Tue Dec 18 22:29:21 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 18 Dec 2007 22:29:21 +0100 (CET)
Subject: [Lxml-checkins] r49913 - in lxml/trunk: . src/lxml
Message-ID: <20071218212921.71B3B16851D@codespeak.net>
Author: scoder
Date: Tue Dec 18 22:29:20 2007
New Revision: 49913
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
r3110 at delle: sbehnel | 2007-12-18 21:05:10 +0100
docstring update
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Dec 18 22:29:20 2007
@@ -1135,8 +1135,8 @@
"""Iterate over all elements in the subtree in document order (depth
first pre-order), starting with this element.
- Can be restricted to find only elements with a specific tag or from a
- namespace.
+ Can be restricted to find only elements with a specific tag
+ (pass ``tag="xyz"``) or from a namespace (pass ``tag="{ns}*"``).
You can also pass the Element, Comment, ProcessingInstruction and
Entity factory functions to look only for the specific element type.
From scoder at codespeak.net Tue Dec 18 22:29:24 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 18 Dec 2007 22:29:24 +0100 (CET)
Subject: [Lxml-checkins] r49914 - in lxml/trunk: . src/lxml/tests
Message-ID: <20071218212924.87781168559@codespeak.net>
Author: scoder
Date: Tue Dec 18 22:29:24 2007
New Revision: 49914
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
r3111 at delle: sbehnel | 2007-12-18 21:06:09 +0100
test case cleanup
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Tue Dec 18 22:29:24 2007
@@ -19,6 +19,14 @@
getattr(cElementTree, "VERSION", "0.0").split(".")]) <= (1,0,6):
cElementTree = None
+try:
+ reversed
+except NameError:
+ # Python 2.3
+ def reversed(seq):
+ seq = list(seq)[::-1]
+ return seq
+
class ETreeTestCaseBase(unittest.TestCase):
etree = None
@@ -593,13 +601,6 @@
def test_iteration_reversed(self):
XML = self.etree.XML
-
- try:
- reversed(())
- except NameError:
- # before Python 2.4
- return
-
root = XML('TwoHm')
result = []
for el in reversed(root):
@@ -1451,6 +1452,23 @@
'',
b)
+ def test_iter(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+ d = SubElement(b, 'd')
+ e = SubElement(c, 'e')
+
+ self.assertEquals(
+ [a, b, d, c, e],
+ list(a.iter()))
+ self.assertEquals(
+ [d],
+ list(d.iter()))
+
def test_getiterator(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
From scoder at codespeak.net Wed Dec 19 08:54:18 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 19 Dec 2007 08:54:18 +0100 (CET)
Subject: [Lxml-checkins] r49916 - lxml/trunk
Message-ID: <20071219075418.EC9481684CA@codespeak.net>
Author: scoder
Date: Wed Dec 19 08:54:17 2007
New Revision: 49916
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
Log:
r3125 at delle: sbehnel | 2007-12-18 22:34:05 +0100
cleanup
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed Dec 19 08:54:17 2007
@@ -41,6 +41,7 @@
* ``lxml.html.tostring`` takes an ``encoding`` argument.
+
2.0alpha5 (2007-11-24)
======================
From scoder at codespeak.net Wed Dec 19 08:54:22 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 19 Dec 2007 08:54:22 +0100 (CET)
Subject: [Lxml-checkins] r49917 - in lxml/trunk: . src/lxml/tests
Message-ID: <20071219075422.2FC411684E1@codespeak.net>
Author: scoder
Date: Wed Dec 19 08:54:21 2007
New Revision: 49917
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/test_objectify.py
Log:
r3126 at delle: sbehnel | 2007-12-18 23:05:03 +0100
fix API usage
Modified: lxml/trunk/src/lxml/tests/test_objectify.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_objectify.py (original)
+++ lxml/trunk/src/lxml/tests/test_objectify.py Wed Dec 19 08:54:21 2007
@@ -1605,9 +1605,11 @@
root = XML(xml)
objectify.annotate(root)
- attribs = root.xpath("//@py:%s" % pytype_name, {"py" : pytype_ns})
+ attribs = root.xpath("//@py:%s" % pytype_name,
+ namespaces={"py" : pytype_ns})
self.assertEquals(0, len(attribs))
- attribs = root.xpath("//@py:test", {"py" : "TEST"})
+ attribs = root.xpath("//@py:test",
+ namespaces={"py" : "TEST"})
self.assertEquals(7, len(attribs))
objectify.setPytypeAttributeTag()
@@ -1617,11 +1619,13 @@
self.assertNotEqual("test", pytype_name.lower())
root = XML(xml)
- attribs = root.xpath("//@py:%s" % pytype_name, {"py" : pytype_ns})
+ attribs = root.xpath("//@py:%s" % pytype_name,
+ namespaces={"py" : pytype_ns})
self.assertEquals(0, len(attribs))
objectify.annotate(root)
- attribs = root.xpath("//@py:%s" % pytype_name, {"py" : pytype_ns})
+ attribs = root.xpath("//@py:%s" % pytype_name,
+ namespaces={"py" : pytype_ns})
self.assertEquals(7, len(attribs))
def test_registered_types(self):
From scoder at codespeak.net Wed Dec 19 08:54:25 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 19 Dec 2007 08:54:25 +0100 (CET)
Subject: [Lxml-checkins] r49918 - in lxml/trunk: . doc
Message-ID: <20071219075425.F2FF0168508@codespeak.net>
Author: scoder
Date: Wed Dec 19 08:54:25 2007
New Revision: 49918
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/extensions.txt
Log:
r3127 at delle: sbehnel | 2007-12-18 23:05:17 +0100
fix API usage
Modified: lxml/trunk/doc/extensions.txt
==============================================================================
--- lxml/trunk/doc/extensions.txt (original)
+++ lxml/trunk/doc/extensions.txt Wed Dec 19 08:54:25 2007
@@ -71,7 +71,7 @@
>>> ns = etree.FunctionNamespace('http://mydomain.org/myfunctions')
>>> ns['hello'] = hello
>>> prefixmap = {'f' : 'http://mydomain.org/myfunctions'}
- >>> print root.xpath('f:hello(local-name(*))', prefixmap)
+ >>> print root.xpath('f:hello(local-name(*))', namespaces=prefixmap)
Hello b
@@ -324,7 +324,7 @@
>>> ns['new-node-set'] = returnsNodeSet
- >>> e = etree.XPathEvaluator(doc, None)
+ >>> e = etree.XPathEvaluator(doc)
>>> r = e.evaluate("new-node-set()/result")
>>> print [ t.text for t in r ]
From scoder at codespeak.net Wed Dec 19 08:54:29 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 19 Dec 2007 08:54:29 +0100 (CET)
Subject: [Lxml-checkins] r49919 - in lxml/trunk: . doc
Message-ID: <20071219075429.10A63168516@codespeak.net>
Author: scoder
Date: Wed Dec 19 08:54:29 2007
New Revision: 49919
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/xpathxslt.txt
Log:
r3128 at delle: sbehnel | 2007-12-18 23:05:32 +0100
fix API usage
Modified: lxml/trunk/doc/xpathxslt.txt
==============================================================================
--- lxml/trunk/doc/xpathxslt.txt (original)
+++ lxml/trunk/doc/xpathxslt.txt Wed Dec 19 08:54:29 2007
@@ -112,8 +112,9 @@
... ''')
>>> doc = etree.parse(f)
- >>> r = doc.xpath('/t:foo/b:bar', {'t': 'http://codespeak.net/ns/test1',
- ... 'b': 'http://codespeak.net/ns/test2'})
+ >>> r = doc.xpath('/t:foo/b:bar',
+ ... namespaces={'t': 'http://codespeak.net/ns/test1',
+ ... 'b': 'http://codespeak.net/ns/test2'})
>>> len(r)
1
>>> r[0].tag
@@ -195,7 +196,7 @@
>>> root = etree.XML("")
- >>> find = etree.XPath("//n:b", {'n':'NS'})
+ >>> find = etree.XPath("//n:b", namespaces={'n':'NS'})
>>> print find(root)[0].tag
{NS}b
@@ -203,7 +204,7 @@
>>> regexpNS = "http://exslt.org/regular-expressions"
>>> find = etree.XPath("//*[re:test(., '^abc$', 'i')]",
- ... {'re':regexpNS})
+ ... namespaces={'re':regexpNS})
>>> root = etree.XML("aBaBc")
>>> print find(root)[0].text
@@ -257,7 +258,7 @@
>>> root = etree.XML("")
- >>> find = etree.XPath("//p:b", {'p' : 'ns'})
+ >>> find = etree.XPath("//p:b", namespaces={'p' : 'ns'})
>>> print find(root)[0].tag
{ns}b
From scoder at codespeak.net Wed Dec 19 08:54:33 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 19 Dec 2007 08:54:33 +0100 (CET)
Subject: [Lxml-checkins] r49920 - in lxml/trunk: . src/lxml/tests
Message-ID: <20071219075433.0B6571684CA@codespeak.net>
Author: scoder
Date: Wed Dec 19 08:54:33 2007
New Revision: 49920
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/test_xpathevaluator.py
Log:
r3129 at delle: sbehnel | 2007-12-18 23:06:04 +0100
fix API usage
Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original)
+++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Wed Dec 19 08:54:33 2007
@@ -97,27 +97,27 @@
root = tree.getroot()
self.assertEquals(
[root[0]],
- tree.xpath('//foo:b', {'foo': 'uri:a'}))
+ tree.xpath('//foo:b', namespaces={'foo': 'uri:a'}))
self.assertEquals(
[],
- tree.xpath('//foo:b', {'foo': 'uri:c'}))
+ tree.xpath('//foo:b', namespaces={'foo': 'uri:c'}))
self.assertEquals(
[root[0]],
- root.xpath('//baz:b', {'baz': 'uri:a'}))
+ root.xpath('//baz:b', namespaces={'baz': 'uri:a'}))
def test_xpath_ns_none(self):
tree = self.parse('')
root = tree.getroot()
self.assertRaises(
TypeError,
- root.xpath, '//b', {None: 'uri:a'})
+ root.xpath, '//b', namespaces={None: 'uri:a'})
def test_xpath_ns_empty(self):
tree = self.parse('')
root = tree.getroot()
self.assertRaises(
TypeError,
- root.xpath, '//b', {'': 'uri:a'})
+ root.xpath, '//b', namespaces={'': 'uri:a'})
def test_xpath_error(self):
tree = self.parse('')
@@ -195,7 +195,7 @@
return 'hello %s' % a
extension = {(None, 'foo'): foo}
tree = self.parse('')
- e = etree.XPathEvaluator(tree, None, [extension])
+ e = etree.XPathEvaluator(tree, extensions=[extension])
self.assertEquals(
"hello you", e.evaluate("foo('you')"))
@@ -212,7 +212,7 @@
return 1/0
extension = {(None, 'foo'): foo}
tree = self.parse('')
- e = etree.XPathEvaluator(tree, None, [extension])
+ e = etree.XPathEvaluator(tree, extensions=[extension])
self.assertRaises(ZeroDivisionError, e.evaluate, "foo('test')")
def test_xpath_extensions_nodes(self):
@@ -225,7 +225,7 @@
return r
x = self.parse('')
- e = etree.XPathEvaluator(x, None, [{(None, 'foo'): f}])
+ e = etree.XPathEvaluator(x, extensions=[{(None, 'foo'): f}])
r = e.evaluate("foo('World')/result")
self.assertEquals(2, len(r))
self.assertEquals('Hoi', r[0].text)
@@ -241,7 +241,7 @@
return r
x = self.parse('')
- e = etree.XPathEvaluator(x, None, [{(None, 'foo'): f}])
+ e = etree.XPathEvaluator(x, extensions=[{(None, 'foo'): f}])
r = e.evaluate("foo(/*)/result")
self.assertEquals(2, len(r))
self.assertEquals('Hoi', r[0].text)
@@ -258,7 +258,7 @@
return r
x = self.parse('Honk')
- e = etree.XPathEvaluator(x, None, [{(None, 'foo'): f}])
+ e = etree.XPathEvaluator(x, extensions=[{(None, 'foo'): f}])
r = e.evaluate("foo(/*)/result")
self.assertEquals(3, len(r))
self.assertEquals('Hoi', r[0].text)
@@ -555,7 +555,7 @@
Test xpath extension functions.
>>> root = SAMPLE_XML
- >>> e = etree.XPathEvaluator(root, None, [extension])
+ >>> e = etree.XPathEvaluator(root, extensions=[extension])
>>> e.evaluate("stringTest('you')")
'Hello you'
>>> e.evaluate(u"stringTest('\xe9lan')")
From scoder at codespeak.net Wed Dec 19 08:54:37 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 19 Dec 2007 08:54:37 +0100 (CET)
Subject: [Lxml-checkins] r49921 - in lxml/trunk: . src/lxml
Message-ID: <20071219075437.84DC616850A@codespeak.net>
Author: scoder
Date: Wed Dec 19 08:54:36 2007
New Revision: 49921
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/xslt.pxi
Log:
r3130 at delle: sbehnel | 2007-12-18 23:06:59 +0100
use keyword-only args in XSLT API
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Wed Dec 19 08:54:36 2007
@@ -179,7 +179,7 @@
* write_network
"""
cdef xslt.xsltSecurityPrefs* _prefs
- def __init__(self, read_file=True, write_file=True, create_dir=True,
+ def __init__(self, *, read_file=True, write_file=True, create_dir=True,
read_network=True, write_network=True):
self._prefs = xslt.xsltNewSecurityPrefs()
if self._prefs is NULL:
@@ -269,7 +269,7 @@
cdef XSLTAccessControl _access_control
cdef _ErrorLog _error_log
- def __init__(self, xslt_input, extensions=None, regexp=True,
+ def __init__(self, xslt_input, *, extensions=None, regexp=True,
access_control=None):
cdef xslt.xsltStylesheet* c_style
cdef xmlDoc* c_doc
@@ -329,8 +329,8 @@
def __get__(self):
return self._error_log.copy()
- def apply(self, _input, profile_run=False, **_kw):
- return self(_input, profile_run, **_kw)
+ def apply(self, _input, *, profile_run=False, **_kw):
+ return self(_input, profile_run=profile_run, **_kw)
def tostring(self, _ElementTree result_tree):
"""Save result doc to string based on stylesheet output method.
@@ -360,7 +360,7 @@
return new_xslt
- def __call__(self, _input, profile_run=False, **_kw):
+ def __call__(self, _input, *, profile_run=False, **_kw):
cdef _XSLTContext context
cdef _XSLTResolverContext resolver_context
cdef _Document input_doc
@@ -595,7 +595,7 @@
if __findStylesheetByID is None:
__findStylesheetByID = XPath(
"//xsl:stylesheet[@xml:id = $id]",
- {"xsl" : "http://www.w3.org/1999/XSL/Transform"})
+ namespaces={"xsl" : "http://www.w3.org/1999/XSL/Transform"})
return __findStylesheetByID(doc, id=id)
cdef class _XSLTProcessingInstruction(PIBase):
From scoder at codespeak.net Wed Dec 19 08:54:40 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 19 Dec 2007 08:54:40 +0100 (CET)
Subject: [Lxml-checkins] r49922 - lxml/trunk
Message-ID: <20071219075440.C214B16850A@codespeak.net>
Author: scoder
Date: Wed Dec 19 08:54:40 2007
New Revision: 49922
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
Log:
r3131 at delle: sbehnel | 2007-12-18 23:20:05 +0100
changelog
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed Dec 19 08:54:40 2007
@@ -23,6 +23,8 @@
Other changes
-------------
+* The XSLT API now requires keyword-only arguments.
+
* The ``getiterator()`` method on Elements and ElementTrees was
reverted to return an iterator as it did in lxml 1.x. The ET API
specification allows it to return either a sequence or an iterator,
From scoder at codespeak.net Wed Dec 19 08:54:44 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 19 Dec 2007 08:54:44 +0100 (CET)
Subject: [Lxml-checkins] r49923 - in lxml/trunk: . doc
Message-ID: <20071219075444.CF713168508@codespeak.net>
Author: scoder
Date: Wed Dec 19 08:54:44 2007
New Revision: 49923
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/doc/main.txt
lxml/trunk/version.txt
Log:
r3132 at delle: sbehnel | 2007-12-19 08:54:03 +0100
another alpha (sigh)
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed Dec 19 08:54:44 2007
@@ -2,8 +2,8 @@
lxml changelog
==============
-2.0beta1 (2007-12-18)
-=====================
+2.0alpha6 (2007-12-19)
+======================
Features added
--------------
@@ -23,7 +23,7 @@
Other changes
-------------
-* The XSLT API now requires keyword-only arguments.
+* Parts of the XSLT API now require keyword-only arguments.
* The ``getiterator()`` method on Elements and ElementTrees was
reverted to return an iterator as it did in lxml 1.x. The ET API
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Wed Dec 19 08:54:44 2007
@@ -138,8 +138,8 @@
.. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/
.. _`this key`: pubkey.asc
-The latest version is `lxml 2.0beta1`_, released 2007-12-18
-(`changes for 2.0beta1`_). `Older versions`_ are listed below.
+The latest version is `lxml 2.0alpha6`_, released 2007-12-19
+(`changes for 2.0alpha6`_). `Older versions`_ are listed below.
.. _`Older versions`: #old-versions
@@ -259,7 +259,7 @@
* `lxml 0.5`_, released 2005-04-08
-.. _`lxml 2.0beta1`: lxml-2.0beta1.tgz
+.. _`lxml 2.0alpha6`: lxml-2.0alpha6.tgz
.. _`lxml 2.0alpha5`: lxml-2.0alpha5.tgz
.. _`lxml 2.0alpha4`: lxml-2.0alpha4.tgz
.. _`lxml 2.0alpha3`: lxml-2.0alpha3.tgz
@@ -290,7 +290,7 @@
.. _`lxml 0.5.1`: lxml-0.5.1.tgz
.. _`lxml 0.5`: lxml-0.5.tgz
-.. _`changes for 2.0beta1`: changes-2.0beta1.html
+.. _`changes for 2.0alpha6`: changes-2.0alpha6.html
.. _`changes for 2.0alpha5`: changes-2.0alpha5.html
.. _`changes for 2.0alpha4`: changes-2.0alpha4.html
.. _`changes for 2.0alpha3`: changes-2.0alpha3.html
Modified: lxml/trunk/version.txt
==============================================================================
--- lxml/trunk/version.txt (original)
+++ lxml/trunk/version.txt Wed Dec 19 08:54:44 2007
@@ -1 +1 @@
-2.0beta1
+2.0alpha6
From scoder at codespeak.net Wed Dec 19 12:02:57 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 19 Dec 2007 12:02:57 +0100 (CET)
Subject: [Lxml-checkins] r49926 - in lxml/trunk: . src/lxml
Message-ID: <20071219110257.3D59E168514@codespeak.net>
Author: scoder
Date: Wed Dec 19 12:02:56 2007
New Revision: 49926
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.etree.pyx
lxml/trunk/src/lxml/xmlid.pxi
Log:
r3141 at delle: sbehnel | 2007-12-19 09:13:32 +0100
eliminated internal calls to ElementTree()
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Wed Dec 19 12:02:56 2007
@@ -1400,13 +1400,13 @@
return self._context_node
def __copy__(self):
- return ElementTree(self._context_node)
+ return _elementTreeFactory(self._doc, self._context_node)
def __deepcopy__(self, memo):
- if self._context_node is None:
- return ElementTree()
- else:
- return ElementTree( self._context_node.__copy__() )
+ cdef _Element root
+ if self._context_node is not None:
+ root = self._context_node.__copy__()
+ return _elementTreeFactory(None, root)
property docinfo:
"""Information about the document provided by parser and DTD. This
Modified: lxml/trunk/src/lxml/xmlid.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlid.pxi (original)
+++ lxml/trunk/src/lxml/xmlid.pxi Wed Dec 19 12:02:56 2007
@@ -1,3 +1,5 @@
+cdef object _find_id_attributes
+
def XMLID(text):
"""Parse the text and return a tuple (root node, ID dictionary). The root
node is the same as returned by the XML() function. The dictionary
@@ -5,10 +7,14 @@
attributes. The elements referenced by the ID are stored as dictionary
values.
"""
+ global _find_id_attributes
+ if _find_id_attributes is None:
+ _find_id_attributes = XPath('//*[string(@id)]')
+
+ # ElementTree compatible implementation: parse and look for 'id' attributes
root = XML(text)
- # ElementTree compatible implementation: look for 'id' attributes
dic = {}
- for elem in ElementTree(root).xpath('//*[string(@id)]'):
+ for elem in _find_id_attributes(root):
python.PyDict_SetItem(dic, elem.get('id'), elem)
return (root, dic)
@@ -40,7 +46,7 @@
"""
cdef _Document doc
doc = _parseDocument(source, parser)
- return (ElementTree(doc.getroot()), _IDDict(doc))
+ return (_elementTreeFactory(doc, None), _IDDict(doc))
cdef class _IDDict:
"""A dictionary-like proxy class that mapps ID attributes to elements.
From scoder at codespeak.net Wed Dec 19 12:13:14 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 19 Dec 2007 12:13:14 +0100 (CET)
Subject: [Lxml-checkins] r49927 - in lxml/trunk: . src/lxml
Message-ID: <20071219111314.C0BBB168518@codespeak.net>
Author: scoder
Date: Wed Dec 19 12:13:14 2007
New Revision: 49927
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/lxml.etree.pyx
lxml/trunk/src/lxml/xpath.pxi
Log:
r3143 at delle: sbehnel | 2007-12-19 12:13:06 +0100
proposed keyword-only API for XPath and iteration methods
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed Dec 19 12:13:14 2007
@@ -23,7 +23,12 @@
Other changes
-------------
-* Parts of the XSLT API now require keyword-only arguments.
+* Various places in the XPath, XSLT and iteration APIs now require
+ keyword-only arguments.
+
+* The argument order in ``element.itersiblings()`` was changed to
+ match the order used in all other iteration methods. The second
+ argument ('preceding') is now a keyword-only argument.
* The ``getiterator()`` method on Elements and ElementTrees was
reverted to return an iterator as it did in lxml 1.x. The ET API
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Wed Dec 19 12:13:14 2007
@@ -1065,7 +1065,7 @@
return _elementFactory(self._doc, c_node)
return None
- def itersiblings(self, preceding=False, tag=None):
+ def itersiblings(self, tag=None, *, preceding=False):
"""Iterate over the following or preceding siblings of this element.
The direction is determined by the 'preceding' keyword which defaults
@@ -1073,7 +1073,7 @@
generated elements can be restricted to a specific tag name with the
'tag' keyword.
"""
- return SiblingsIterator(self, preceding, tag)
+ return SiblingsIterator(self, tag, preceding=preceding)
def iterancestors(self, tag=None):
"""Iterate over the ancestors of this element (from parent to parent).
@@ -1090,16 +1090,16 @@
itself. The generated elements can be restricted to a specific tag
name with the 'tag' keyword.
"""
- return ElementDepthFirstIterator(self, tag, False)
+ return ElementDepthFirstIterator(self, tag, inclusive=False)
- def iterchildren(self, reversed=False, tag=None):
+ def iterchildren(self, tag=None, *, reversed=False):
"""Iterate over the children of this element.
As opposed to using normal iteration on this element, the generated
elements can be restricted to a specific tag name with the 'tag'
keyword and reversed with the 'reversed' keyword.
"""
- return ElementChildIterator(self, reversed, tag)
+ return ElementChildIterator(self, tag, reversed=reversed)
def getroottree(self):
"""Return an ElementTree for the root node of the document that
@@ -1143,7 +1143,7 @@
"""
return ElementDepthFirstIterator(self, tag)
- def itertext(self, tag=None, with_tail=True):
+ def itertext(self, tag=None, *, with_tail=True):
"""Iterates over the text content of a subtree.
You can pass the ``tag`` keyword argument to restrict text content to
@@ -1152,7 +1152,7 @@
You can set the ``with_tail`` keyword argument to ``False`` to skip
over tail text.
"""
- return ElementTextIterator(self, tag, with_tail)
+ return ElementTextIterator(self, tag, with_tail=with_tail)
def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
"""Creates a new element associated with the same document.
@@ -1188,10 +1188,11 @@
path = (path).text
return _elementpath.iterfind(self, path)
- def xpath(self, _path, namespaces=None, extensions=None, **_variables):
+ def xpath(self, _path, *, namespaces=None, extensions=None, **_variables):
"""Evaluate an xpath expression using the element as context node.
"""
- evaluator = XPathElementEvaluator(self, namespaces, extensions)
+ evaluator = XPathElementEvaluator(self, namespaces=namespaces,
+ extensions=extensions)
return evaluator.evaluate(_path, **_variables)
@@ -1545,7 +1546,7 @@
path = "." + path
return root.iterfind(path)
- def xpath(self, _path, namespaces=None, extensions=None, **_variables):
+ def xpath(self, _path, *, namespaces=None, extensions=None, **_variables):
"""XPath evaluate in context of document.
``namespaces`` is an optional dictionary with prefix to namespace URI
@@ -1562,7 +1563,8 @@
XPathEvaluator directly.
"""
self._assertHasRoot()
- evaluator = XPathDocumentEvaluator(self, namespaces, extensions)
+ evaluator = XPathDocumentEvaluator(self, namespaces=namespaces,
+ extensions=extensions)
return evaluator.evaluate(_path, **_variables)
def xslt(self, _xslt, extensions=None, access_control=None, **_kw):
@@ -1892,7 +1894,7 @@
cdef class ElementChildIterator(_ElementIterator):
"Iterates over the children of an element."
- def __init__(self, _Element node not None, reversed=False, tag=None):
+ def __init__(self, _Element node not None, tag=None, *, reversed=False):
cdef xmlNode* c_node
self._initTagMatch(tag)
if reversed:
@@ -1916,7 +1918,7 @@
You can pass the boolean keyword ``preceding`` to specify the direction.
"""
- def __init__(self, _Element node not None, preceding=False, tag=None):
+ def __init__(self, _Element node not None, tag=None, *, preceding=False):
self._initTagMatch(tag)
if preceding:
self._next_element = _previousElement
@@ -1951,7 +1953,7 @@
# keep next node to return and a depth counter in the tree
cdef _Element _next_node
cdef _Element _top_node
- def __init__(self, _Element node not None, tag=None, inclusive=True):
+ def __init__(self, _Element node not None, tag=None, *, inclusive=True):
self._top_node = node
self._next_node = node
self._initTagMatch(tag)
@@ -2009,7 +2011,7 @@
"""
cdef object _nextEvent
cdef _Element _start_element
- def __init__(self, _Element element not None, tag=None, with_tail=True):
+ def __init__(self, _Element element not None, tag=None, *, with_tail=True):
if with_tail:
events = ("start", "end")
else:
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Wed Dec 19 12:13:14 2007
@@ -215,7 +215,7 @@
the 'regexp' boolean keyword (defaults to True).
"""
cdef _Element _element
- def __init__(self, _Element element not None, namespaces=None,
+ def __init__(self, _Element element not None, *, namespaces=None,
extensions=None, regexp=True):
cdef xpath.xmlXPathContext* xpathCtxt
cdef int ns_register_status
@@ -280,10 +280,11 @@
keyword argument. EXSLT regular expression support can be disabled with
the 'regexp' boolean keyword (defaults to True).
"""
- def __init__(self, _ElementTree etree not None, namespaces=None,
+ def __init__(self, _ElementTree etree not None, *, namespaces=None,
extensions=None, regexp=True):
XPathElementEvaluator.__init__(
- self, etree._context_node, namespaces, extensions, regexp)
+ self, etree._context_node, namespaces=namespaces,
+ extensions=extensions, regexp=regexp)
def __call__(self, _path, **_variables):
"""Evaluate an XPath expression on the document.
@@ -322,7 +323,7 @@
return result
-def XPathEvaluator(etree_or_element, namespaces=None, extensions=None,
+def XPathEvaluator(etree_or_element, *, namespaces=None, extensions=None,
regexp=True):
"""Creates an XPath evaluator for an ElementTree or an Element.
@@ -334,11 +335,13 @@
the 'regexp' boolean keyword (defaults to True).
"""
if isinstance(etree_or_element, _ElementTree):
- return XPathDocumentEvaluator(etree_or_element, namespaces,
- extensions, regexp)
+ return XPathDocumentEvaluator(
+ etree_or_element, namespaces=namespaces,
+ extensions=extensions, regexp=regexp)
else:
- return XPathElementEvaluator(etree_or_element, namespaces,
- extensions, regexp)
+ return XPathElementEvaluator(
+ etree_or_element, namespaces=namespaces,
+ extensions=extensions, regexp=regexp)
cdef class XPath(_XPathEvaluatorBase):
@@ -353,7 +356,7 @@
cdef xpath.xmlXPathCompExpr* _xpath
cdef readonly object path
- def __init__(self, path, namespaces=None, extensions=None, regexp=True):
+ def __init__(self, path, *, namespaces=None, extensions=None, regexp=True):
cdef xpath.xmlXPathContext* xpathCtxt
_XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp)
self.path = path
@@ -415,9 +418,10 @@
Note that this class does not accept the ``namespace`` keyword
argument. All namespaces must be passed as part of the path string.
"""
- def __init__(self, path, extensions=None, regexp=True):
+ def __init__(self, path, *, extensions=None, regexp=True):
path, namespaces = self._nsextract_path(path)
- XPath.__init__(self, path, namespaces, extensions, regexp)
+ XPath.__init__(self, path, namespaces=namespaces,
+ extensions=extensions, regexp=regexp)
cdef _nsextract_path(self, path):
# replace {namespaces} by new prefixes
From scoder at codespeak.net Wed Dec 19 12:31:52 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 19 Dec 2007 12:31:52 +0100 (CET)
Subject: [Lxml-checkins] r49928 - in lxml/trunk: . doc
Message-ID: <20071219113152.9C4BA168514@codespeak.net>
Author: scoder
Date: Wed Dec 19 12:31:50 2007
New Revision: 49928
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/build.txt
lxml/trunk/setup.py
Log:
r3145 at delle: sbehnel | 2007-12-19 12:31:40 +0100
require Cython 0.9.6.10
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Wed Dec 19 12:31:50 2007
@@ -33,11 +33,11 @@
be an lxml developer, you do need a working Cython installation. You can use
EasyInstall_ to install it::
- easy_install Cython==0.9.6.8
+ easy_install Cython==0.9.6.10
.. _EasyInstall: http://peak.telecommunity.com/DevCenter/EasyInstall
-lxml currently requires at least Cython 0.9.6.8, but later versions
+lxml currently requires at least Cython 0.9.6.10, but later versions
should work.
Modified: lxml/trunk/setup.py
==============================================================================
--- lxml/trunk/setup.py (original)
+++ lxml/trunk/setup.py Wed Dec 19 12:31:50 2007
@@ -16,7 +16,7 @@
except pkg_resources.VersionConflict:
from ez_setup import use_setuptools
use_setuptools(version="0.6c5")
- #pkg_resources.require("Cython==0.9.6.6")
+ #pkg_resources.require("Cython==0.9.6.10")
from setuptools import setup
extra_options["zip_safe"] = False
except ImportError:
From scoder at codespeak.net Wed Dec 19 12:34:34 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 19 Dec 2007 12:34:34 +0100 (CET)
Subject: [Lxml-checkins] r49929 - in lxml/trunk: . doc
Message-ID: <20071219113434.697EE168514@codespeak.net>
Author: scoder
Date: Wed Dec 19 12:34:33 2007
New Revision: 49929
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/mkhtml.py
Log:
r3147 at delle: sbehnel | 2007-12-19 12:34:29 +0100
fix API usage
Modified: lxml/trunk/doc/mkhtml.py
==============================================================================
--- lxml/trunk/doc/mkhtml.py (original)
+++ lxml/trunk/doc/mkhtml.py Wed Dec 19 12:34:33 2007
@@ -20,13 +20,13 @@
])
find_title = XPath("/h:html/h:head/h:title/text()",
- {"h" : "http://www.w3.org/1999/xhtml"})
+ namespaces={"h" : "http://www.w3.org/1999/xhtml"})
find_headings = XPath("//h:h1[not(@class)]/h:a/text()",
- {"h" : "http://www.w3.org/1999/xhtml"})
+ namespaces={"h" : "http://www.w3.org/1999/xhtml"})
find_menu = XPath("//h:ul[@id=$name]",
- {"h" : "http://www.w3.org/1999/xhtml"})
+ namespaces={"h" : "http://www.w3.org/1999/xhtml"})
find_page_end = XPath("/h:html/h:body/h:div[last()]",
- {"h" : "http://www.w3.org/1999/xhtml"})
+ namespaces={"h" : "http://www.w3.org/1999/xhtml"})
replace_invalid = re.compile(r'[-_/.\s\\]').sub
From lxml-checkins at codespeak.net Wed Dec 19 17:38:57 2007
From: lxml-checkins at codespeak.net (VIAGRA ® Official Site)
Date: Wed, 19 Dec 2007 17:38:57 +0100 (CET)
Subject: [Lxml-checkins] December 76% OFF
Message-ID: <20071219103911.7621.qmail@ppp85-141-227-102.pppoe.mtu-net.ru>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20071219/2000cd2b/attachment.htm
From scoder at codespeak.net Thu Dec 20 11:35:04 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 20 Dec 2007 11:35:04 +0100 (CET)
Subject: [Lxml-checkins] r49948 - in lxml/trunk: . doc
Message-ID: <20071220103504.344DC169E2A@codespeak.net>
Author: scoder
Date: Thu Dec 20 11:35:02 2007
New Revision: 49948
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/FAQ.txt
Log:
r3149 at delle: sbehnel | 2007-12-19 16:37:14 +0100
replaced Pyrex by Cython in docs
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Thu Dec 20 11:35:02 2007
@@ -240,7 +240,7 @@
the eggs can only support the one they were compiled with.
This means that you have to compile lxml from sources for your system. Note
-that you do not need Pyrex for this, the lxml source distribution is directly
+that you do not need Cython for this, the lxml source distribution is directly
compilable on both platform types. See the `build instructions`_ on how to do
this.
@@ -257,9 +257,9 @@
the C-level is required for performance reasons.
To avoid writing plain C-code and caring too much about the details of
-built-in types and reference counting, lxml is written in Pyrex_, a
+built-in types and reference counting, lxml is written in Cython_, a
Python-like language that is translated into C-code. Chances are that if you
-know Python, you can write `code that Pyrex accepts`_. Again, the C-ish style
+know Python, you can write `code that Cython accepts`_. Again, the C-ish style
used in the lxml code is just for performance optimisations. If you want to
contribute, don't bother with the details, a Python implementation of your
contribution is better than none. And keep in mind that lxml's flexible API
@@ -269,8 +269,8 @@
Please contact the `mailing list`_ if you need any help.
-.. _Pyrex: http://www.cosc.canterbury.ac.nz/greg.ewing/python/Pyrex/
-.. _`code that Pyrex accepts`: http://www.cosc.canterbury.ac.nz/greg.ewing/python/Pyrex/version/Doc/overview.html
+.. _Cython: http://www.cython.org/
+.. _`code that Cython accepts`: http://www.cosc.canterbury.ac.nz/greg.ewing/python/Pyrex/version/Doc/overview.html
How can I contribute?
From scoder at codespeak.net Thu Dec 20 11:35:07 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 20 Dec 2007 11:35:07 +0100 (CET)
Subject: [Lxml-checkins] r49949 - in lxml/trunk: . doc
Message-ID: <20071220103507.8E54E169E36@codespeak.net>
Author: scoder
Date: Thu Dec 20 11:35:07 2007
New Revision: 49949
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/FAQ.txt
Log:
r3150 at delle: sbehnel | 2007-12-20 11:34:35 +0100
FAQ fix
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Thu Dec 20 11:35:07 2007
@@ -394,12 +394,11 @@
from disk and memory, as long as you use either the default parser (which is
replicated for each thread) or create a parser for each thread yourself. lxml
also allows concurrency during validation (RelaxNG and XMLSchema) and XSL
-transformation. You can share RelaxNG, XMLSchema and XSLT objects between
-threads. While you can also share parsers between threads, this will
-serialize the access to each of them, so it is better to copy() parsers or to
-use the default parser. Note that access to the XML() and HTML() functions is
-always serialized. If you need to parse concurrently from strings, use
-``parse()`` with ``StringIO`` or pass a separate parser to these functions.
+transformation. You can share RelaxNG, XMLSchema and (with restrictions) XSLT
+objects between threads. While you can also share parsers between threads,
+this will serialize the access to each of them, so it is better to ``copy()``
+parsers or to just use the default parser (which is automatically copied for
+each thread).
Due to the way libxslt handles threading, concurrent access to stylesheets is
currently only possible if it was parsed in the main thread. Parsing and
From scoder at codespeak.net Thu Dec 20 11:35:11 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 20 Dec 2007 11:35:11 +0100 (CET)
Subject: [Lxml-checkins] r49950 - in lxml/trunk: . benchmark
Message-ID: <20071220103511.619B9169E24@codespeak.net>
Author: scoder
Date: Thu Dec 20 11:35:10 2007
New Revision: 49950
Modified:
lxml/trunk/ (props changed)
lxml/trunk/benchmark/bench_etree.py
Log:
r3151 at delle: sbehnel | 2007-12-20 11:34:50 +0100
API usage fix
Modified: lxml/trunk/benchmark/bench_etree.py
==============================================================================
--- lxml/trunk/benchmark/bench_etree.py (original)
+++ lxml/trunk/benchmark/bench_etree.py Thu Dec 20 11:35:10 2007
@@ -37,24 +37,24 @@
@with_attributes(True, False)
@with_text(text=True, utext=True)
def bench_tostring_utf8(self, root):
- self.etree.tostring(root, 'UTF-8')
+ self.etree.tostring(root, encoding='UTF-8')
@with_attributes(True, False)
@with_text(text=True, utext=True)
def bench_tostring_utf16(self, root):
- self.etree.tostring(root, 'UTF-16')
+ self.etree.tostring(root, encoding='UTF-16')
@with_attributes(True, False)
@with_text(text=True, utext=True)
def bench_tostring_utf8_unicode_XML(self, root):
- xml = unicode(self.etree.tostring(root, 'UTF-8'), 'UTF-8')
+ xml = unicode(self.etree.tostring(root, encoding='UTF-8'), 'UTF-8')
self.etree.XML(xml)
@with_attributes(True, False)
@with_text(text=True, utext=True)
def bench_write_utf8_parse_stringIO(self, root):
f = StringIO()
- self.etree.ElementTree(root).write(f, 'UTF-8')
+ self.etree.ElementTree(root).write(f, encoding='UTF-8')
f.seek(0)
self.etree.parse(f)
From scoder at codespeak.net Thu Dec 20 17:32:00 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 20 Dec 2007 17:32:00 +0100 (CET)
Subject: [Lxml-checkins] r49952 - in lxml/trunk: . src/lxml
Message-ID: <20071220163200.ED277168465@codespeak.net>
Author: scoder
Date: Thu Dec 20 17:31:58 2007
New Revision: 49952
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
r3155 at delle: sbehnel | 2007-12-20 13:30:48 +0100
doc fix
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Thu Dec 20 17:31:58 2007
@@ -1950,7 +1950,7 @@
tree it traverses is modified during iteration.
"""
# we keep Python references here to control GC
- # keep next node to return and a depth counter in the tree
+ # keep next node to return and the (s)top node
cdef _Element _next_node
cdef _Element _top_node
def __init__(self, _Element node not None, tag=None, *, inclusive=True):
From scoder at codespeak.net Thu Dec 20 17:32:04 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 20 Dec 2007 17:32:04 +0100 (CET)
Subject: [Lxml-checkins] r49953 - in lxml/trunk: . benchmark
Message-ID: <20071220163204.8DE711684F2@codespeak.net>
Author: scoder
Date: Thu Dec 20 17:32:03 2007
New Revision: 49953
Modified:
lxml/trunk/ (props changed)
lxml/trunk/benchmark/benchbase.py
Log:
r3156 at delle: sbehnel | 2007-12-20 13:31:26 +0100
prefer locally installed ET/cET in benchmark imports
Modified: lxml/trunk/benchmark/benchbase.py
==============================================================================
--- lxml/trunk/benchmark/benchbase.py (original)
+++ lxml/trunk/benchmark/benchbase.py Thu Dec 20 17:32:03 2007
@@ -458,11 +458,11 @@
except ValueError:
pass
try:
- import xml.etree.cElementTree as cET
+ import cElementTree as cET
_etrees.append(cET)
except ImportError:
try:
- import cElementTree as cET
+ import xml.etree.cElementTree as cET
_etrees.append(cET)
except ImportError:
pass
@@ -474,11 +474,11 @@
pass
else:
try:
- from xml.etree import ElementTree as ET
+ from elementtree import ElementTree as ET
_etrees.append(ET)
except ImportError:
try:
- from elementtree import ElementTree as ET
+ from xml.etree import ElementTree as ET
_etrees.append(ET)
except ImportError:
pass
From scoder at codespeak.net Thu Dec 20 17:32:07 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 20 Dec 2007 17:32:07 +0100 (CET)
Subject: [Lxml-checkins] r49954 - in lxml/trunk: . src/lxml
Message-ID: <20071220163207.D83D11684F2@codespeak.net>
Author: scoder
Date: Thu Dec 20 17:32:07 2007
New Revision: 49954
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
r3157 at delle: sbehnel | 2007-12-20 13:36:39 +0100
fix: release lock in corner case
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Thu Dec 20 17:32:07 2007
@@ -1232,6 +1232,8 @@
result = element_class()
if hasProxy(c_node):
# prevent re-entry race condition - we just called into Python
+ if config.ENABLE_THREADING:
+ python.PyThread_release_lock(ELEMENT_CREATION_LOCK)
result._c_node = NULL
return getProxy(c_node)
result._doc = doc
From scoder at codespeak.net Thu Dec 20 17:32:10 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 20 Dec 2007 17:32:10 +0100 (CET)
Subject: [Lxml-checkins] r49955 - in lxml/trunk: . benchmark
Message-ID: <20071220163210.F0C0B168521@codespeak.net>
Author: scoder
Date: Thu Dec 20 17:32:10 2007
New Revision: 49955
Modified:
lxml/trunk/ (props changed)
lxml/trunk/benchmark/bench_xpath.py
Log:
r3158 at delle: sbehnel | 2007-12-20 17:28:08 +0100
API usage fix
Modified: lxml/trunk/benchmark/bench_xpath.py
==============================================================================
--- lxml/trunk/benchmark/bench_xpath.py (original)
+++ lxml/trunk/benchmark/bench_xpath.py Thu Dec 20 17:32:10 2007
@@ -61,7 +61,7 @@
self.etree.FunctionNamespace("testns")["t"] = return_child
try:
- xpath = self.etree.XPath("test:t(.)", {"test":"testns"})
+ xpath = self.etree.XPath("test:t(.)", namespaces={"test":"testns"})
for child in children:
xpath(child)
finally:
From scoder at codespeak.net Thu Dec 20 17:32:14 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 20 Dec 2007 17:32:14 +0100 (CET)
Subject: [Lxml-checkins] r49956 - in lxml/trunk: . doc
Message-ID: <20071220163214.91A5216852D@codespeak.net>
Author: scoder
Date: Thu Dec 20 17:32:14 2007
New Revision: 49956
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/performance.txt
Log:
r3159 at delle: sbehnel | 2007-12-20 17:31:49 +0100
updated benchmark results for lxml 2.0
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Thu Dec 20 17:32:14 2007
@@ -66,10 +66,12 @@
a specific part of the API yourself, please consider sending it to the lxml
mailing list.
-The timings cited below compare lxml 1.3 (with libxml2 2.6.27) to the
-ElementTree and cElementTree versions shipped with CPython 2.5 (based on
-ElementTree 1.2.6). They were run single-threaded on a 1.8GHz Intel Core Duo
-machine under Ubuntu Linux 7.04 (Feisty).
+The timings cited below compare lxml 2.0alpha (with libxml2 2.6.30) to
+the December 2007 SVN trunk versions of ElementTree (1.3) and
+cElementTree (1.2.7). They were run single-threaded on a 1.8GHz Intel
+Core Duo machine under Ubuntu Linux 7.10 (Gutsy). The C libraries
+were compiled with the same platform specific optimisation flags. The
+Python interpreter (2.5.1) was used as provided by the distribution.
.. _`bench_etree.py`: http://codespeak.net/svn/lxml/branch/lxml-1.3/benchmark/bench_etree.py
.. _`bench_xpath.py`: http://codespeak.net/svn/lxml/branch/lxml-1.3/benchmark/bench_xpath.py
@@ -103,73 +105,88 @@
Parsing and Serialising
=======================
-These are areas where lxml excels. The reason is that both parts are executed
-entirely at the C level, without major interaction with Python code. The
-results are rather impressive. Compared to cElementTree, lxml is about 20 to
-40 times faster on serialisation::
-
- lxe: tostring_utf16 (SATR T1) 21.9206 msec/pass
- cET: tostring_utf16 (SATR T1) 461.9428 msec/pass
- ET : tostring_utf16 (SATR T1) 486.8946 msec/pass
-
- lxe: tostring_utf16 (UATR T1) 22.7508 msec/pass
- cET: tostring_utf16 (UATR T1) 526.3446 msec/pass
- ET : tostring_utf16 (UATR T1) 496.0767 msec/pass
-
- lxe: tostring_utf16 (S-TR T2) 23.8452 msec/pass
- cET: tostring_utf16 (S-TR T2) 537.9200 msec/pass
- ET : tostring_utf16 (S-TR T2) 504.4273 msec/pass
-
- lxe: tostring_utf8 (S-TR T2) 18.2550 msec/pass
- cET: tostring_utf8 (S-TR T2) 528.3908 msec/pass
- ET : tostring_utf8 (S-TR T2) 549.7071 msec/pass
-
- lxe: tostring_utf8 (U-TR T3) 2.5497 msec/pass
- cET: tostring_utf8 (U-TR T3) 49.8495 msec/pass
- ET : tostring_utf8 (U-TR T3) 62.6927 msec/pass
-
-For parsing, the difference between the libraries is smaller. The (c)ET
-libraries use the expat parser, which is known to be extremely fast::
-
- lxe: parse_stringIO (SAXR T1) 150.2380 msec/pass
- cET: parse_stringIO (SAXR T1) 25.9311 msec/pass
- ET : parse_stringIO (SAXR T1) 222.9431 msec/pass
-
- lxe: parse_stringIO (S-XR T3) 5.9490 msec/pass
- cET: parse_stringIO (S-XR T3) 5.4519 msec/pass
- ET : parse_stringIO (S-XR T3) 76.4120 msec/pass
-
- lxe: parse_stringIO (UAXR T3) 29.3601 msec/pass
- cET: parse_stringIO (UAXR T3) 28.9941 msec/pass
- ET : parse_stringIO (UAXR T3) 163.5361 msec/pass
-
-The expat parser allows cET to be up to 80% faster than lxml on plain parser
-performance. Similar timings can be observed for the ``iterparse()``
-function. However, if you take a complete input-output cycle, the numbers
-will look similar to these::
-
- lxe: write_utf8_parse_stringIO (S-TR T1) 166.3210 msec/pass
- cET: write_utf8_parse_stringIO (S-TR T1) 581.2099 msec/pass
- ET : write_utf8_parse_stringIO (S-TR T1) 803.5331 msec/pass
-
- lxe: write_utf8_parse_stringIO (UATR T2) 184.4249 msec/pass
- cET: write_utf8_parse_stringIO (UATR T2) 671.5119 msec/pass
- ET : write_utf8_parse_stringIO (UATR T2) 924.3481 msec/pass
-
- lxe: write_utf8_parse_stringIO (S-TR T3) 9.1329 msec/pass
- cET: write_utf8_parse_stringIO (S-TR T3) 77.9850 msec/pass
- ET : write_utf8_parse_stringIO (S-TR T3) 157.0492 msec/pass
-
- lxe: write_utf8_parse_stringIO (SATR T4) 1.3900 msec/pass
- cET: write_utf8_parse_stringIO (SATR T4) 12.6081 msec/pass
- ET : write_utf8_parse_stringIO (SATR T4) 16.2580 msec/pass
+Serialisation is an area where lxml excels. The reason is that it
+executes entirely at the C level, without any interaction with Python
+code. The results are rather impressive, especially for UTF-8, which
+is native to libxml2. While 20 to 40 times faster than (c)ElementTree
+1.2, lxml is still more than 5 times as fast as the much improved
+ElementTree 1.3::
+
+ lxe: tostring_utf16 (SATR T1) 23.4821 msec/pass
+ cET: tostring_utf16 (SATR T1) 129.8430 msec/pass
+ ET : tostring_utf16 (SATR T1) 136.1301 msec/pass
+
+ lxe: tostring_utf16 (UATR T1) 23.4859 msec/pass
+ cET: tostring_utf16 (UATR T1) 130.1570 msec/pass
+ ET : tostring_utf16 (UATR T1) 136.3101 msec/pass
+
+ lxe: tostring_utf16 (S-TR T2) 24.2729 msec/pass
+ cET: tostring_utf16 (S-TR T2) 136.9388 msec/pass
+ ET : tostring_utf16 (S-TR T2) 143.9550 msec/pass
+
+ lxe: tostring_utf8 (S-TR T2) 18.4860 msec/pass
+ cET: tostring_utf8 (S-TR T2) 137.0859 msec/pass
+ ET : tostring_utf8 (S-TR T2) 144.3110 msec/pass
+
+ lxe: tostring_utf8 (U-TR T3) 2.7399 msec/pass
+ cET: tostring_utf8 (U-TR T3) 52.1040 msec/pass
+ ET : tostring_utf8 (U-TR T3) 53.1070 msec/pass
+
+For parsing, on the other hand, the advantage is clearly with
+cElementTree. The (c)ET libraries use a very thin layer on top of the
+expat parser, which is known to be extremely fast::
+
+ lxe: parse_stringIO (SAXR T1) 144.1851 msec/pass
+ cET: parse_stringIO (SAXR T1) 14.4269 msec/pass
+ ET : parse_stringIO (SAXR T1) 245.9190 msec/pass
+
+ lxe: parse_stringIO (S-XR T3) 5.6100 msec/pass
+ cET: parse_stringIO (S-XR T3) 5.3229 msec/pass
+ ET : parse_stringIO (S-XR T3) 82.4831 msec/pass
+
+ lxe: parse_stringIO (UAXR T3) 23.4420 msec/pass
+ cET: parse_stringIO (UAXR T3) 30.2689 msec/pass
+ ET : parse_stringIO (UAXR T3) 165.7169 msec/pass
+
+While about as fast for smaller documents, the expat parser allows cET
+to be up to 10 times faster than lxml on plain parser performance for
+large input documents. Similar timings can be observed for the
+``iterparse()`` function::
+
+ lxe: iterparse_stringIO (SAXR T1) 160.3689 msec/pass
+ cET: iterparse_stringIO (SAXR T1) 19.1891 msec/pass
+ ET : iterparse_stringIO (SAXR T1) 274.8971 msec/pass
+
+ lxe: iterparse_stringIO (UAXR T3) 24.9629 msec/pass
+ cET: iterparse_stringIO (UAXR T3) 31.7740 msec/pass
+ ET : iterparse_stringIO (UAXR T3) 173.8000 msec/pass
+
+However, if you benchmark the complete round-trip of a serialise-parse
+cycle, the numbers will look similar to these::
+
+ lxe: write_utf8_parse_stringIO (S-TR T1) 160.0718 msec/pass
+ cET: write_utf8_parse_stringIO (S-TR T1) 207.6778 msec/pass
+ ET : write_utf8_parse_stringIO (S-TR T1) 450.2120 msec/pass
+
+ lxe: write_utf8_parse_stringIO (UATR T2) 173.5830 msec/pass
+ cET: write_utf8_parse_stringIO (UATR T2) 253.0849 msec/pass
+ ET : write_utf8_parse_stringIO (UATR T2) 519.2261 msec/pass
+
+ lxe: write_utf8_parse_stringIO (S-TR T3) 8.4269 msec/pass
+ cET: write_utf8_parse_stringIO (S-TR T3) 75.7639 msec/pass
+ ET : write_utf8_parse_stringIO (S-TR T3) 156.1930 msec/pass
+
+ lxe: write_utf8_parse_stringIO (SATR T4) 1.2100 msec/pass
+ cET: write_utf8_parse_stringIO (SATR T4) 6.4859 msec/pass
+ ET : write_utf8_parse_stringIO (SATR T4) 9.9051 msec/pass
For applications that require a high parser throughput and do little
-serialization, cET is the best choice. Also for iterparse applications that
-extract small amounts of data from large XML data sets. If it comes to
-round-trip performance, however, lxml tends to be 3-4 times faster in
-total. So, whenever the input documents are not considerably bigger than the
-output, lxml is the clear winner.
+serialization, cET is the best choice. Also for iterparse
+applications that extract small amounts of data from large XML data
+sets. If it comes to round-trip performance, however, lxml tends to
+be between 30% and multiple times faster in total. So, whenever the
+input documents are not considerably bigger than the output, lxml is
+the clear winner.
The ElementTree API
@@ -182,23 +199,23 @@
restructuring. This can be seen from the tree setup times of the benchmark
(given in seconds)::
- lxe: -- S- U- -A SA UA
- T1: 0.1181 0.1080 0.1074 0.1088 0.1087 0.1099
- T2: 0.1103 0.1109 0.1164 0.1241 0.1203 0.1231
- T3: 0.0297 0.0309 0.0297 0.0716 0.0704 0.0703
- T4: 0.0005 0.0004 0.0004 0.0014 0.0014 0.0014
- cET: -- S- U- -A SA UA
- T1: 0.0290 0.0271 0.0275 0.0297 0.0273 0.0274
- T2: 0.0280 0.0280 0.0281 0.0285 0.0283 0.0286
- T3: 0.0071 0.0072 0.0071 0.0113 0.0096 0.0096
+ lxe: -- S- U- -A SA UA
+ T1: 0.0914 0.0875 0.0872 0.0892 0.0882 0.0900
+ T2: 0.0894 0.0897 0.0892 0.0988 0.0978 0.0974
+ T3: 0.0219 0.0194 0.0189 0.0570 0.0570 0.0573
+ T4: 0.0004 0.0004 0.0004 0.0012 0.0013 0.0012
+ cET: -- S- U- -A SA UA
+ T1: 0.0272 0.0264 0.0267 0.0268 0.0261 0.0265
+ T2: 0.0280 0.0274 0.0273 0.0273 0.0276 0.0275
+ T3: 0.0065 0.0066 0.0065 0.0111 0.0088 0.0088
T4: 0.0001 0.0001 0.0001 0.0001 0.0001 0.0001
- ET : -- S- U- -A SA UA
- T1: 0.1362 0.1985 0.2300 0.1344 0.2672 0.1335
- T2: 0.3107 0.1386 0.3581 0.3886 0.1388 0.4277
- T3: 0.0334 0.0332 0.0320 0.0367 0.3769 0.0375
- T4: 0.0006 0.0005 0.0008 0.0007 0.0007 0.0006
+ ET : -- S- U- -A SA UA
+ T1: 0.1302 0.1903 0.2208 0.1265 0.2542 0.1267
+ T2: 0.2994 0.1301 0.3402 0.3746 0.1326 0.4170
+ T3: 0.0301 0.0310 0.0302 0.0348 0.3654 0.0349
+ T4: 0.0006 0.0005 0.0008 0.0006 0.0007 0.0006
-While lxml is still faster than ET in most cases (30-60%), cET can be up to
+While lxml is still faster than ET in most cases (10-70%), cET can be up to
three times faster than lxml here. One of the reasons is that lxml must
additionally discard the created Python elements after their use, when they
are no longer referenced. ET and cET represent the tree itself through these
@@ -208,36 +225,41 @@
Child access
------------
-The same reason makes operations like ``getchildren()`` more costly in lxml.
-Where ET and cET can quickly create a shallow copy of their list of children,
-lxml has to create a Python object for each child and collect them in a list::
-
- lxe: root_getchildren (--TR T2) 0.1960 msec/pass
- cET: root_getchildren (--TR T2) 0.0150 msec/pass
- ET : root_getchildren (--TR T2) 0.0091 msec/pass
-
-When accessing single children, however, e.g. by index, this handicap is
-negligible::
-
- lxe: first_child (--TR T2) 0.2289 msec/pass
- cET: first_child (--TR T2) 0.2048 msec/pass
- ET : first_child (--TR T2) 0.9291 msec/pass
-
- lxe: last_child (--TR T1) 0.2310 msec/pass
- cET: last_child (--TR T1) 0.2148 msec/pass
- ET : last_child (--TR T1) 0.9191 msec/pass
-
-... unless you add the time to find a child index in a bigger list, as ET and
-cET use Python lists here, which are based on arrays. The data structure used
-by libxml2 is a linked tree, and thus, a linked list of children::
-
- lxe: middle_child (--TR T1) 0.2759 msec/pass
- cET: middle_child (--TR T1) 0.2069 msec/pass
- ET : middle_child (--TR T1) 0.9291 msec/pass
-
- lxe: middle_child (--TR T2) 1.7111 msec/pass
- cET: middle_child (--TR T2) 0.2089 msec/pass
- ET : middle_child (--TR T2) 0.9360 msec/pass
+The same reason makes operations like collecting children as in
+``list(element)`` more costly in lxml. Where ET and cET can quickly
+create a shallow copy of their list of children, lxml has to create a
+Python object for each child and collect them in a list::
+
+ lxe: root_list_children (--TR T1) 0.0169 msec/pass
+ cET: root_list_children (--TR T1) 0.0081 msec/pass
+ ET : root_list_children (--TR T1) 0.0541 msec/pass
+
+ lxe: root_list_children (--TR T2) 0.2339 msec/pass
+ cET: root_list_children (--TR T2) 0.0319 msec/pass
+ ET : root_list_children (--TR T2) 0.4420 msec/pass
+
+This handicap is also visible when accessing single children::
+
+ lxe: first_child (--TR T2) 0.3228 msec/pass
+ cET: first_child (--TR T2) 0.2170 msec/pass
+ ET : first_child (--TR T2) 0.9968 msec/pass
+
+ lxe: last_child (--TR T1) 0.3269 msec/pass
+ cET: last_child (--TR T1) 0.2291 msec/pass
+ ET : last_child (--TR T1) 0.9830 msec/pass
+
+... unless you also add the time to find a child index in a bigger
+list. ET and cET use Python lists here, which are based on arrays.
+The data structure used by libxml2 is a linked tree, and thus, a
+linked list of children::
+
+ lxe: middle_child (--TR T1) 0.3638 msec/pass
+ cET: middle_child (--TR T1) 0.2229 msec/pass
+ ET : middle_child (--TR T1) 1.0030 msec/pass
+
+ lxe: middle_child (--TR T2) 2.1780 msec/pass
+ cET: middle_child (--TR T2) 0.2229 msec/pass
+ ET : middle_child (--TR T2) 0.9930 msec/pass
Element creation
@@ -247,21 +269,21 @@
in. This results in a major performance difference for creating independent
Elements that end up in independently created documents::
- lxe: create_elements (--TC T2) 3.7301 msec/pass
- cET: create_elements (--TC T2) 0.1960 msec/pass
- ET : create_elements (--TC T2) 1.4279 msec/pass
+ lxe: create_elements (--TC T2) 3.1691 msec/pass
+ cET: create_elements (--TC T2) 0.1929 msec/pass
+ ET : create_elements (--TC T2) 1.3590 msec/pass
Therefore, it is always preferable to create Elements for the document they
are supposed to end up in, either as SubElements of an Element or using the
explicit ``Element.makeelement()`` call::
- lxe: makeelement (--TC T2) 2.3680 msec/pass
- cET: makeelement (--TC T2) 0.3128 msec/pass
- ET : makeelement (--TC T2) 1.6940 msec/pass
-
- lxe: create_subelements (--TC T2) 2.2051 msec/pass
- cET: create_subelements (--TC T2) 0.2370 msec/pass
- ET : create_subelements (--TC T2) 3.2189 msec/pass
+ lxe: makeelement (--TC T2) 2.2941 msec/pass
+ cET: makeelement (--TC T2) 0.3211 msec/pass
+ ET : makeelement (--TC T2) 1.6358 msec/pass
+
+ lxe: create_subelements (--TC T2) 2.1169 msec/pass
+ cET: create_subelements (--TC T2) 0.2351 msec/pass
+ ET : create_subelements (--TC T2) 3.2270 msec/pass
So, if the main performance bottleneck of an application is creating large XML
trees in memory through calls to Element and SubElement, cET is the best
@@ -278,13 +300,13 @@
The following benchmark appends all root children of the second tree to the
root of the first tree::
- lxe: append_from_document (--TR T1,T2) 4.3468 msec/pass
- cET: append_from_document (--TR T1,T2) 0.2608 msec/pass
- ET : append_from_document (--TR T1,T2) 1.2310 msec/pass
-
- lxe: append_from_document (--TR T3,T4) 0.0679 msec/pass
- cET: append_from_document (--TR T3,T4) 0.0148 msec/pass
- ET : append_from_document (--TR T3,T4) 0.0880 msec/pass
+ lxe: append_from_document (--TR T1,T2) 3.8681 msec/pass
+ cET: append_from_document (--TR T1,T2) 0.2699 msec/pass
+ ET : append_from_document (--TR T1,T2) 1.2650 msec/pass
+
+ lxe: append_from_document (--TR T3,T4) 0.0570 msec/pass
+ cET: append_from_document (--TR T3,T4) 0.0169 msec/pass
+ ET : append_from_document (--TR T3,T4) 0.0820 msec/pass
Although these are fairly small numbers compared to parsing, this easily shows
the different performance classes for lxml and (c)ET. Where the latter do not
@@ -295,15 +317,22 @@
This difference is not always as visible, but applies to most parts of the
API, like inserting newly created elements::
- lxe: insert_from_document (--TR T1,T2) 6.3150 msec/pass
- cET: insert_from_document (--TR T1,T2) 0.4039 msec/pass
- ET : insert_from_document (--TR T1,T2) 1.4770 msec/pass
+ lxe: insert_from_document (--TR T1,T2) 5.8019 msec/pass
+ cET: insert_from_document (--TR T1,T2) 0.4041 msec/pass
+ ET : insert_from_document (--TR T1,T2) 1.4789 msec/pass
-Or replacing the child slice by a new element::
+or replacing the child slice by a newly created element::
- lxe: replace_children_element (--TC T1) 0.2608 msec/pass
+ lxe: replace_children_element (--TC T1) 0.2520 msec/pass
cET: replace_children_element (--TC T1) 0.0238 msec/pass
- ET : replace_children_element (--TC T1) 0.1628 msec/pass
+ ET : replace_children_element (--TC T1) 0.1600 msec/pass
+
+as opposed to replacing the slice with an existing element from the
+same document::
+
+ lxe: replace_children (--TC T1) 0.0188 msec/pass
+ cET: replace_children (--TC T1) 0.0119 msec/pass
+ ET : replace_children (--TC T1) 0.0739 msec/pass
You should keep this difference in mind when you merge very large trees.
@@ -313,17 +342,17 @@
Deep copying a tree is fast in lxml::
- lxe: deepcopy_all (--TR T1) 11.0400 msec/pass
- cET: deepcopy_all (--TR T1) 119.6141 msec/pass
- ET : deepcopy_all (--TR T1) 451.2160 msec/pass
-
- lxe: deepcopy_all (-ATR T2) 13.5410 msec/pass
- cET: deepcopy_all (-ATR T2) 135.2482 msec/pass
- ET : deepcopy_all (-ATR T2) 476.1350 msec/pass
-
- lxe: deepcopy_all (S-TR T3) 4.2889 msec/pass
- cET: deepcopy_all (S-TR T3) 36.0429 msec/pass
- ET : deepcopy_all (S-TR T3) 113.4322 msec/pass
+ lxe: deepcopy_all (--TR T1) 10.9420 msec/pass
+ cET: deepcopy_all (--TR T1) 120.6188 msec/pass
+ ET : deepcopy_all (--TR T1) 902.6880 msec/pass
+
+ lxe: deepcopy_all (-ATR T2) 12.5830 msec/pass
+ cET: deepcopy_all (-ATR T2) 136.9810 msec/pass
+ ET : deepcopy_all (-ATR T2) 944.2801 msec/pass
+
+ lxe: deepcopy_all (S-TR T3) 4.1170 msec/pass
+ cET: deepcopy_all (S-TR T3) 36.1221 msec/pass
+ ET : deepcopy_all (S-TR T3) 221.6041 msec/pass
So, for example, if you have a database-like scenario where you parse in a
large tree and then search and copy independent subtrees from it for further
@@ -338,39 +367,39 @@
especially if few elements are of interest or the target element tag name is
known, lxml is a good choice::
- lxe: getiterator_all (--TR T2) 6.4790 msec/pass
- cET: getiterator_all (--TR T2) 28.2831 msec/pass
- ET : getiterator_all (--TR T2) 26.0720 msec/pass
-
- lxe: getiterator_islice (--TR T2) 0.0892 msec/pass
- cET: getiterator_islice (--TR T2) 0.2460 msec/pass
- ET : getiterator_islice (--TR T2) 26.6550 msec/pass
-
- lxe: getiterator_tag (--TR T2) 0.3850 msec/pass
- cET: getiterator_tag (--TR T2) 9.3720 msec/pass
- ET : getiterator_tag (--TR T2) 22.8221 msec/pass
-
- lxe: getiterator_tag_all (--TR T2) 0.7222 msec/pass
- cET: getiterator_tag_all (--TR T2) 27.2939 msec/pass
- ET : getiterator_tag_all (--TR T2) 22.8271 msec/pass
+ lxe: getiterator_all (--TR T1) 6.0360 msec/pass
+ cET: getiterator_all (--TR T1) 39.9489 msec/pass
+ ET : getiterator_all (--TR T1) 23.0000 msec/pass
+
+ lxe: getiterator_islice (--TR T2) 0.0851 msec/pass
+ cET: getiterator_islice (--TR T2) 0.3440 msec/pass
+ ET : getiterator_islice (--TR T2) 0.2429 msec/pass
+
+ lxe: getiterator_tag (--TR T2) 0.3290 msec/pass
+ cET: getiterator_tag (--TR T2) 14.1001 msec/pass
+ ET : getiterator_tag (--TR T2) 7.4241 msec/pass
+
+ lxe: getiterator_tag_all (--TR T2) 0.7281 msec/pass
+ cET: getiterator_tag_all (--TR T2) 40.7901 msec/pass
+ ET : getiterator_tag_all (--TR T2) 21.0390 msec/pass
This translates directly into similar timings for ``Element.findall()``::
- lxe: findall (--TR T2) 6.8321 msec/pass
- cET: findall (--TR T2) 28.8639 msec/pass
- ET : findall (--TR T2) 27.1060 msec/pass
-
- lxe: findall (--TR T3) 1.3590 msec/pass
- cET: findall (--TR T3) 8.9881 msec/pass
- ET : findall (--TR T3) 6.4890 msec/pass
-
- lxe: findall_tag (--TR T2) 0.9229 msec/pass
- cET: findall_tag (--TR T2) 27.2651 msec/pass
- ET : findall_tag (--TR T2) 22.7208 msec/pass
-
- lxe: findall_tag (--TR T3) 0.1700 msec/pass
- cET: findall_tag (--TR T3) 6.4540 msec/pass
- ET : findall_tag (--TR T3) 5.4770 msec/pass
+ lxe: findall (--TR T2) 8.2440 msec/pass
+ cET: findall (--TR T2) 44.5340 msec/pass
+ ET : findall (--TR T2) 27.1149 msec/pass
+
+ lxe: findall (--TR T3) 1.7269 msec/pass
+ cET: findall (--TR T3) 12.9611 msec/pass
+ ET : findall (--TR T3) 8.6131 msec/pass
+
+ lxe: findall_tag (--TR T2) 0.8020 msec/pass
+ cET: findall_tag (--TR T2) 40.6358 msec/pass
+ ET : findall_tag (--TR T2) 21.4581 msec/pass
+
+ lxe: findall_tag (--TR T3) 0.2341 msec/pass
+ cET: findall_tag (--TR T3) 9.6831 msec/pass
+ ET : findall_tag (--TR T3) 5.2109 msec/pass
Note that all three libraries currently use the same Python implementation for
``findall()``, except for their native tree iterator.
@@ -386,49 +415,52 @@
of the lxml API you use. The most straight forward way is to call the
``xpath()`` method on an Element or ElementTree::
- lxe: xpath_method (--TC T1) 1.0180 msec/pass
- lxe: xpath_method (--TC T2) 20.3521 msec/pass
- lxe: xpath_method (--TC T3) 0.1259 msec/pass
- lxe: xpath_method (--TC T4) 1.0169 msec/pass
+ lxe: xpath_method (--TC T1) 1.8251 msec/pass
+ lxe: xpath_method (--TC T2) 23.3159 msec/pass
+ lxe: xpath_method (--TC T3) 0.1378 msec/pass
+ lxe: xpath_method (--TC T4) 1.1270 msec/pass
This is well suited for testing and when the XPath expressions are as diverse
as the trees they are called on. However, if you have a single XPath
expression that you want to apply to a larger number of different elements,
the ``XPath`` class is the most efficient way to do it::
- lxe: xpath_class (--TC T1) 0.1891 msec/pass
- lxe: xpath_class (--TC T2) 3.0179 msec/pass
- lxe: xpath_class (--TC T3) 0.0570 msec/pass
- lxe: xpath_class (--TC T4) 0.1910 msec/pass
+ lxe: xpath_class (--TC T1) 0.6981 msec/pass
+ lxe: xpath_class (--TC T2) 3.6111 msec/pass
+ lxe: xpath_class (--TC T3) 0.0591 msec/pass
+ lxe: xpath_class (--TC T4) 0.1979 msec/pass
Note that this still allows you to use variables in the expression, so you can
parse it once and then adapt it through variables at call time. In other
cases, where you have a fixed Element or ElementTree and want to run different
expressions on it, you should consider the ``XPathEvaluator``::
- lxe: xpath_element (--TR T1) 0.4089 msec/pass
- lxe: xpath_element (--TR T2) 5.9960 msec/pass
- lxe: xpath_element (--TR T3) 0.1230 msec/pass
- lxe: xpath_element (--TR T4) 0.3440 msec/pass
+ lxe: xpath_element (--TR T1) 0.4342 msec/pass
+ lxe: xpath_element (--TR T2) 11.9958 msec/pass
+ lxe: xpath_element (--TR T3) 0.1690 msec/pass
+ lxe: xpath_element (--TR T4) 0.3510 msec/pass
While it looks slightly slower, creating an XPath object for each of the
expressions generates a much higher overhead here::
- lxe: xpath_class_repeat (--TC T1) 1.0259 msec/pass
- lxe: xpath_class_repeat (--TC T2) 20.4861 msec/pass
- lxe: xpath_class_repeat (--TC T3) 0.1280 msec/pass
- lxe: xpath_class_repeat (--TC T4) 1.0269 msec/pass
+ lxe: xpath_class_repeat (--TC T1) 1.7619 msec/pass
+ lxe: xpath_class_repeat (--TC T2) 21.9102 msec/pass
+ lxe: xpath_class_repeat (--TC T3) 0.1330 msec/pass
+ lxe: xpath_class_repeat (--TC T4) 1.0631 msec/pass
A longer example
================
-A while ago, Uche Ogbuji posted a `benchmark proposal`_ that would read in a
-3MB XML version of the `Old Testament`_ of the Bible and look for the word
-*begat* in all verses. Apparently, it is contained in 120 out of almost 24000
-verses. This is easy to implement in ElementTree using ``findall()``.
-However, the fastest way to do this is obviously ``iterparse()``, as most of
-the data is not of any interest.
+... based on lxml 1.3.
+
+A while ago, Uche Ogbuji posted a `benchmark proposal`_ that would
+read in a 3MB XML version of the `Old Testament`_ of the Bible and
+look for the word *begat* in all verses. Apparently, it is contained
+in 120 out of almost 24000 verses. This is easy to implement in
+ElementTree using ``findall()``. However, the fastest and most memory
+friendly way to do this is obviously ``iterparse()``, as most of the
+data is not of any interest.
.. _`benchmark proposal`: http://www.onlamp.com/pub/wlg/6291
.. _`Old Testament`: http://www.ibiblio.org/bosak/xml/eg/religion.2.00.xml.zip
@@ -571,21 +603,21 @@
tree. It avoids step-by-step Python element instantiations along the path,
which can substantially improve the access time::
- lxe: attribute (--TR T1) 10.6189 msec/pass
- lxe: attribute (--TR T2) 53.7431 msec/pass
- lxe: attribute (--TR T4) 10.3359 msec/pass
-
- lxe: objectpath (--TR T1) 5.8351 msec/pass
- lxe: objectpath (--TR T2) 48.1579 msec/pass
- lxe: objectpath (--TR T4) 5.6930 msec/pass
-
- lxe: attributes_deep (--TR T1) 58.7430 msec/pass
- lxe: attributes_deep (--TR T2) 63.0901 msec/pass
- lxe: attributes_deep (--TR T4) 17.4620 msec/pass
-
- lxe: objectpath_deep (--TR T1) 52.1719 msec/pass
- lxe: objectpath_deep (--TR T2) 52.9201 msec/pass
- lxe: objectpath_deep (--TR T4) 7.5650 msec/pass
+ lxe: attribute (--TR T1) 9.8128 msec/pass
+ lxe: attribute (--TR T2) 53.2899 msec/pass
+ lxe: attribute (--TR T4) 9.6800 msec/pass
+
+ lxe: objectpath (--TR T1) 5.4898 msec/pass
+ lxe: objectpath (--TR T2) 48.4819 msec/pass
+ lxe: objectpath (--TR T4) 5.3761 msec/pass
+
+ lxe: attributes_deep (--TR T1) 56.3290 msec/pass
+ lxe: attributes_deep (--TR T2) 62.4361 msec/pass
+ lxe: attributes_deep (--TR T4) 15.8000 msec/pass
+
+ lxe: objectpath_deep (--TR T1) 49.0060 msec/pass
+ lxe: objectpath_deep (--TR T2) 52.5169 msec/pass
+ lxe: objectpath_deep (--TR T4) 7.1371 msec/pass
Note, however, that parsing ObjectPath expressions is not for free either, so
this is most effective for frequently accessing the same element.
@@ -611,17 +643,17 @@
subtrees and elements) to cache, you can trade memory usage against access
speed::
- lxe: attribute_cached (--TR T1) 7.9739 msec/pass
- lxe: attribute_cached (--TR T2) 50.9331 msec/pass
- lxe: attribute_cached (--TR T4) 7.8540 msec/pass
-
- lxe: attributes_deep_cached (--TR T1) 51.1391 msec/pass
- lxe: attributes_deep_cached (--TR T2) 55.7129 msec/pass
- lxe: attributes_deep_cached (--TR T4) 10.7968 msec/pass
-
- lxe: objectpath_deep_cached (--TR T1) 47.6151 msec/pass
- lxe: objectpath_deep_cached (--TR T2) 48.0802 msec/pass
- lxe: objectpath_deep_cached (--TR T4) 4.0281 msec/pass
+ lxe: attribute_cached (--TR T1) 7.6170 msec/pass
+ lxe: attribute_cached (--TR T2) 50.7941 msec/pass
+ lxe: attribute_cached (--TR T4) 7.4880 msec/pass
+
+ lxe: attributes_deep_cached (--TR T1) 49.9220 msec/pass
+ lxe: attributes_deep_cached (--TR T2) 55.9340 msec/pass
+ lxe: attributes_deep_cached (--TR T4) 10.0131 msec/pass
+
+ lxe: objectpath_deep_cached (--TR T1) 44.9121 msec/pass
+ lxe: objectpath_deep_cached (--TR T2) 48.2371 msec/pass
+ lxe: objectpath_deep_cached (--TR T4) 3.9630 msec/pass
Things to note: you cannot currently use ``weakref.WeakKeyDictionary`` objects
for this as lxml's element objects do not support weak references (which are
From scoder at codespeak.net Thu Dec 20 18:32:33 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 20 Dec 2007 18:32:33 +0100 (CET)
Subject: [Lxml-checkins] r49957 - in lxml/trunk: . src/lxml
Message-ID: <20071220173233.7F52C169EB3@codespeak.net>
Author: scoder
Date: Thu Dec 20 18:32:32 2007
New Revision: 49957
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/classlookup.pxi
lxml/trunk/src/lxml/parser.pxi
Log:
r3165 at delle: sbehnel | 2007-12-20 17:57:48 +0100
deprecation in docs
Modified: lxml/trunk/src/lxml/classlookup.pxi
==============================================================================
--- lxml/trunk/src/lxml/classlookup.pxi (original)
+++ lxml/trunk/src/lxml/classlookup.pxi Thu Dec 20 18:32:32 2007
@@ -299,7 +299,7 @@
LOOKUP_ELEMENT_CLASS = function
def setElementClassLookup(ElementClassLookup lookup = None):
- "Deprecated, use ``set_element_class_lookup(lookup)`` instead"
+ "@deprecated: use ``set_element_class_lookup(lookup)`` instead"
set_element_class_lookup(lookup)
def set_element_class_lookup(ElementClassLookup lookup = None):
@@ -313,4 +313,4 @@
cdef ParserBasedElementClassLookup DEFAULT_ELEMENT_CLASS_LOOKUP
DEFAULT_ELEMENT_CLASS_LOOKUP = ParserBasedElementClassLookup()
-setElementClassLookup(DEFAULT_ELEMENT_CLASS_LOOKUP)
+set_element_class_lookup(DEFAULT_ELEMENT_CLASS_LOOKUP)
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Thu Dec 20 18:32:32 2007
@@ -658,7 +658,7 @@
return "libxml2 %d.%d.%d" % LIBXML_VERSION
def setElementClassLookup(self, ElementClassLookup lookup = None):
- "Deprecated, use ``parser.set_element_class_lookup(lookup)`` instead."
+ "@deprecated: use ``parser.set_element_class_lookup(lookup)`` instead."
self.set_element_class_lookup(lookup)
def set_element_class_lookup(self, ElementClassLookup lookup = None):
@@ -1510,11 +1510,11 @@
__GLOBAL_PARSER_CONTEXT.setDefaultParser(__DEFAULT_XML_PARSER)
def setDefaultParser(parser=None):
- "Deprecated, please use set_default_parser instead."
+ "@deprecated: please use set_default_parser instead."
set_default_parser(parser)
def getDefaultParser():
- "Deprecated, please use get_default_parser instead."
+ "@deprecated: please use get_default_parser instead."
return get_default_parser()
def set_default_parser(_BaseParser parser=None):
From scoder at codespeak.net Thu Dec 20 18:32:36 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 20 Dec 2007 18:32:36 +0100 (CET)
Subject: [Lxml-checkins] r49958 - in lxml/trunk: . doc src/lxml
Message-ID: <20071220173236.891B0169EB5@codespeak.net>
Author: scoder
Date: Thu Dec 20 18:32:36 2007
New Revision: 49958
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/performance.txt
lxml/trunk/src/lxml/classlookup.pxi
Log:
r3166 at delle: sbehnel | 2007-12-20 18:32:27 +0100
tiny speedup in element instantiation
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Thu Dec 20 18:32:36 2007
@@ -203,7 +203,7 @@
T1: 0.0914 0.0875 0.0872 0.0892 0.0882 0.0900
T2: 0.0894 0.0897 0.0892 0.0988 0.0978 0.0974
T3: 0.0219 0.0194 0.0189 0.0570 0.0570 0.0573
- T4: 0.0004 0.0004 0.0004 0.0012 0.0013 0.0012
+ T4: 0.0004 0.0003 0.0003 0.0012 0.0012 0.0012
cET: -- S- U- -A SA UA
T1: 0.0272 0.0264 0.0267 0.0268 0.0261 0.0265
T2: 0.0280 0.0274 0.0273 0.0273 0.0276 0.0275
@@ -367,19 +367,19 @@
especially if few elements are of interest or the target element tag name is
known, lxml is a good choice::
- lxe: getiterator_all (--TR T1) 6.0360 msec/pass
+ lxe: getiterator_all (--TR T1) 5.8582 msec/pass
cET: getiterator_all (--TR T1) 39.9489 msec/pass
ET : getiterator_all (--TR T1) 23.0000 msec/pass
- lxe: getiterator_islice (--TR T2) 0.0851 msec/pass
+ lxe: getiterator_islice (--TR T2) 0.0780 msec/pass
cET: getiterator_islice (--TR T2) 0.3440 msec/pass
ET : getiterator_islice (--TR T2) 0.2429 msec/pass
- lxe: getiterator_tag (--TR T2) 0.3290 msec/pass
+ lxe: getiterator_tag (--TR T2) 0.3119 msec/pass
cET: getiterator_tag (--TR T2) 14.1001 msec/pass
ET : getiterator_tag (--TR T2) 7.4241 msec/pass
- lxe: getiterator_tag_all (--TR T2) 0.7281 msec/pass
+ lxe: getiterator_tag_all (--TR T2) 0.6540 msec/pass
cET: getiterator_tag_all (--TR T2) 40.7901 msec/pass
ET : getiterator_tag_all (--TR T2) 21.0390 msec/pass
Modified: lxml/trunk/src/lxml/classlookup.pxi
==============================================================================
--- lxml/trunk/src/lxml/classlookup.pxi (original)
+++ lxml/trunk/src/lxml/classlookup.pxi Thu Dec 20 18:32:36 2007
@@ -102,49 +102,50 @@
def __init__(self, element=None, comment=None, pi=None, entity=None):
self._lookup_function = _lookupDefaultElementClass
if element is None:
- self.element_class = None
+ self.element_class = _Element
elif issubclass(element, ElementBase):
self.element_class = element
else:
raise TypeError, "element class must be subclass of ElementBase"
if comment is None:
- self.comment_class = None
+ self.comment_class = _Comment
elif issubclass(comment, CommentBase):
self.comment_class = comment
else:
raise TypeError, "comment class must be subclass of CommentBase"
- if pi is None:
- self.pi_class = None
- elif issubclass(pi, PIBase):
- self.pi_class = pi
- else:
- raise TypeError, "PI class must be subclass of PIBase"
-
if entity is None:
- self.entity_class = None
+ self.entity_class = _Entity
elif issubclass(entity, EntityBase):
self.entity_class = entity
else:
raise TypeError, "Entity class must be subclass of EntityBase"
+ if pi is None:
+ self.pi_class = None # special case, see below
+ elif issubclass(pi, PIBase):
+ self.pi_class = pi
+ else:
+ raise TypeError, "PI class must be subclass of PIBase"
+
cdef object _lookupDefaultElementClass(state, _Document _doc, xmlNode* c_node):
"Trivial class lookup function that always returns the default class."
if c_node.type == tree.XML_ELEMENT_NODE:
if state is not None:
- cls = (state).element_class
- if cls is None:
- return _Element
+ return (state).element_class
else:
- return cls
+ return _Element
elif c_node.type == tree.XML_COMMENT_NODE:
if state is not None:
- cls = (state).comment_class
- if cls is None:
+ return (state).comment_class
+ else:
return _Comment
+ elif c_node.type == tree.XML_ENTITY_REF_NODE:
+ if state is not None:
+ return (state).entity_class
else:
- return cls
+ return _Entity
elif c_node.type == tree.XML_PI_NODE:
if state is not None:
cls = (state).pi_class
@@ -158,13 +159,6 @@
return _ProcessingInstruction
else:
return cls
- elif c_node.type == tree.XML_ENTITY_REF_NODE:
- if state is not None:
- cls = (state).entity_class
- if cls is None:
- return _Entity
- else:
- return cls
else:
assert 0, "Unknown node type: %s" % c_node.type
@@ -220,12 +214,10 @@
self._lookup_function = _parser_class_lookup
cdef object _parser_class_lookup(state, _Document doc, xmlNode* c_node):
- cdef FallbackElementClassLookup lookup
- lookup = state
if doc._parser._class_lookup is not None:
return doc._parser._class_lookup._lookup_function(
doc._parser._class_lookup, doc, c_node)
- return lookup._callFallback(doc, c_node)
+ return (state)._callFallback(doc, c_node)
cdef class CustomElementClassLookup(FallbackElementClassLookup):
From scoder at codespeak.net Thu Dec 20 18:48:07 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 20 Dec 2007 18:48:07 +0100 (CET)
Subject: [Lxml-checkins] r49961 - in lxml/trunk: . doc
Message-ID: <20071220174807.AD861169E8A@codespeak.net>
Author: scoder
Date: Thu Dec 20 18:48:07 2007
New Revision: 49961
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/performance.txt
Log:
r3169 at delle: sbehnel | 2007-12-20 18:48:02 +0100
benchmark updates
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Thu Dec 20 18:48:07 2007
@@ -240,11 +240,11 @@
This handicap is also visible when accessing single children::
- lxe: first_child (--TR T2) 0.3228 msec/pass
+ lxe: first_child (--TR T2) 0.2470 msec/pass
cET: first_child (--TR T2) 0.2170 msec/pass
ET : first_child (--TR T2) 0.9968 msec/pass
- lxe: last_child (--TR T1) 0.3269 msec/pass
+ lxe: last_child (--TR T1) 0.2482 msec/pass
cET: last_child (--TR T1) 0.2291 msec/pass
ET : last_child (--TR T1) 0.9830 msec/pass
@@ -253,11 +253,11 @@
The data structure used by libxml2 is a linked tree, and thus, a
linked list of children::
- lxe: middle_child (--TR T1) 0.3638 msec/pass
+ lxe: middle_child (--TR T1) 0.2789 msec/pass
cET: middle_child (--TR T1) 0.2229 msec/pass
ET : middle_child (--TR T1) 1.0030 msec/pass
- lxe: middle_child (--TR T2) 2.1780 msec/pass
+ lxe: middle_child (--TR T2) 1.9610 msec/pass
cET: middle_child (--TR T2) 0.2229 msec/pass
ET : middle_child (--TR T2) 0.9930 msec/pass
@@ -277,11 +277,11 @@
are supposed to end up in, either as SubElements of an Element or using the
explicit ``Element.makeelement()`` call::
- lxe: makeelement (--TC T2) 2.2941 msec/pass
+ lxe: makeelement (--TC T2) 2.2650 msec/pass
cET: makeelement (--TC T2) 0.3211 msec/pass
ET : makeelement (--TC T2) 1.6358 msec/pass
- lxe: create_subelements (--TC T2) 2.1169 msec/pass
+ lxe: create_subelements (--TC T2) 1.9531 msec/pass
cET: create_subelements (--TC T2) 0.2351 msec/pass
ET : create_subelements (--TC T2) 3.2270 msec/pass
@@ -323,7 +323,7 @@
or replacing the child slice by a newly created element::
- lxe: replace_children_element (--TC T1) 0.2520 msec/pass
+ lxe: replace_children_element (--TC T1) 0.2480 msec/pass
cET: replace_children_element (--TC T1) 0.0238 msec/pass
ET : replace_children_element (--TC T1) 0.1600 msec/pass
From scoder at codespeak.net Thu Dec 20 19:25:38 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 20 Dec 2007 19:25:38 +0100 (CET)
Subject: [Lxml-checkins] r49963 - in lxml/trunk: . doc
Message-ID: <20071220182538.6563B169EA9@codespeak.net>
Author: scoder
Date: Thu Dec 20 19:25:36 2007
New Revision: 49963
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/performance.txt
Log:
r3171 at delle: sbehnel | 2007-12-20 19:25:24 +0100
benchmark updates
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Thu Dec 20 19:25:36 2007
@@ -385,19 +385,19 @@
This translates directly into similar timings for ``Element.findall()``::
- lxe: findall (--TR T2) 8.2440 msec/pass
+ lxe: findall (--TR T2) 8.1239 msec/pass
cET: findall (--TR T2) 44.5340 msec/pass
ET : findall (--TR T2) 27.1149 msec/pass
- lxe: findall (--TR T3) 1.7269 msec/pass
+ lxe: findall (--TR T3) 1.6870 msec/pass
cET: findall (--TR T3) 12.9611 msec/pass
ET : findall (--TR T3) 8.6131 msec/pass
- lxe: findall_tag (--TR T2) 0.8020 msec/pass
+ lxe: findall_tag (--TR T2) 0.7660 msec/pass
cET: findall_tag (--TR T2) 40.6358 msec/pass
ET : findall_tag (--TR T2) 21.4581 msec/pass
- lxe: findall_tag (--TR T3) 0.2341 msec/pass
+ lxe: findall_tag (--TR T3) 0.2160 msec/pass
cET: findall_tag (--TR T3) 9.6831 msec/pass
ET : findall_tag (--TR T3) 5.2109 msec/pass
From scoder at codespeak.net Thu Dec 20 19:35:43 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 20 Dec 2007 19:35:43 +0100 (CET)
Subject: [Lxml-checkins] r49964 - lxml/trunk
Message-ID: <20071220183543.7D4C6169EAD@codespeak.net>
Author: scoder
Date: Thu Dec 20 19:35:41 2007
New Revision: 49964
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
Log:
r3173 at delle: sbehnel | 2007-12-20 19:33:03 +0100
changelog
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu Dec 20 19:35:41 2007
@@ -2,6 +2,21 @@
lxml changelog
==============
+Under development
+=================
+
+Features added
+--------------
+
+Bugs fixed
+----------
+
+Other changes
+-------------
+
+* Minor performance tweaks
+
+
2.0alpha6 (2007-12-19)
======================
From scoder at codespeak.net Thu Dec 20 19:35:46 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 20 Dec 2007 19:35:46 +0100 (CET)
Subject: [Lxml-checkins] r49965 - lxml/trunk
Message-ID: <20071220183546.ABE17169EB0@codespeak.net>
Author: scoder
Date: Thu Dec 20 19:35:46 2007
New Revision: 49965
Modified:
lxml/trunk/ (props changed)
lxml/trunk/version.txt
Log:
r3174 at delle: sbehnel | 2007-12-20 19:35:38 +0100
next release will be beta1
Modified: lxml/trunk/version.txt
==============================================================================
--- lxml/trunk/version.txt (original)
+++ lxml/trunk/version.txt Thu Dec 20 19:35:46 2007
@@ -1 +1 @@
-2.0alpha6
+2.0beta1
From scoder at codespeak.net Tue Dec 25 18:06:56 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 25 Dec 2007 18:06:56 +0100 (CET)
Subject: [Lxml-checkins] r50103 - in lxml/trunk: . doc
Message-ID: <20071225170656.5525F1684DA@codespeak.net>
Author: scoder
Date: Tue Dec 25 18:06:55 2007
New Revision: 50103
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/lxml2.txt
Log:
r3177 at delle: sbehnel | 2007-12-22 16:10:24 +0100
doc update
Modified: lxml/trunk/doc/lxml2.txt
==============================================================================
--- lxml/trunk/doc/lxml2.txt (original)
+++ lxml/trunk/doc/lxml2.txt Tue Dec 25 18:06:55 2007
@@ -47,6 +47,18 @@
.. _`namespace implementation`: element_classes.html#implementing-namespaces
+* Some API functions now require passing options as keyword arguments,
+ as opposed to positional arguments. This restriction was introduced
+ to make the API usage independent of future extensions such as the
+ addition of new positional arguments. Users should not rely on the
+ position of an optional argument in function signatures and instead
+ pass it explicitly named. This also improves code readability - it
+ is common good practice to pass options in a consistent way
+ independent of their position, so many people may not even notice
+ the change in their code. Another important reason is compatibility
+ with cElementTree, which also enforces keyword-only arguments in a
+ couple of places.
+
* XPath now raises exceptions specific to the part of the execution that
failed: ``XPathSyntaxError`` for parser errors and ``XPathEvalError`` for
errors that occurred during the evaluation. Note that the distinction only
From scoder at codespeak.net Tue Dec 25 18:06:59 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 25 Dec 2007 18:06:59 +0100 (CET)
Subject: [Lxml-checkins] r50104 - in lxml/trunk: . src/lxml
Message-ID: <20071225170659.1BAB11684E0@codespeak.net>
Author: scoder
Date: Tue Dec 25 18:06:59 2007
New Revision: 50104
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/classlookup.pxi
Log:
r3178 at delle: sbehnel | 2007-12-25 14:57:47 +0100
moved code out of element instantiation fast path
Modified: lxml/trunk/src/lxml/classlookup.pxi
==============================================================================
--- lxml/trunk/src/lxml/classlookup.pxi (original)
+++ lxml/trunk/src/lxml/classlookup.pxi Tue Dec 25 18:06:59 2007
@@ -78,10 +78,10 @@
"""
self.fallback = lookup
self._fallback_function = lookup._lookup_function
-
- cdef object _callFallback(self, doc, xmlNode* c_node):
if self._fallback_function is NULL:
self._fallback_function = _lookupDefaultElementClass
+
+ cdef object _callFallback(self, _Document doc, xmlNode* c_node):
return self._fallback_function(self.fallback, doc, c_node)
From scoder at codespeak.net Tue Dec 25 18:07:04 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 25 Dec 2007 18:07:04 +0100 (CET)
Subject: [Lxml-checkins] r50105 - in lxml/trunk: . src/lxml
Message-ID: <20071225170704.9B63B1684E8@codespeak.net>
Author: scoder
Date: Tue Dec 25 18:07:04 2007
New Revision: 50105
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/apihelpers.pxi
Log:
r3179 at delle: sbehnel | 2007-12-25 15:06:26 +0100
cleanup and optimisations in _makeSubElement()
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Tue Dec 25 18:07:04 2007
@@ -14,7 +14,8 @@
Other changes
-------------
-* Minor performance tweaks
+* Minor performance tweaks for Element instantiation and subelement
+ creation
2.0alpha6 (2007-12-19)
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Tue Dec 25 18:07:04 2007
@@ -138,30 +138,15 @@
attrib, nsmap, extra_attrs):
"""Create a new child element and initialize text content, namespaces and
attributes.
-
- This helper function will reuse as much of the existing document as
- possible:
-
- If 'parser' is None, the parser will be inherited from 'doc' or the
- default parser will be used.
-
- If 'doc' is None, 'c_doc' is used to create a new _Document and the new
- element is made its root node.
-
- If 'c_doc' is also NULL, a new xmlDoc will be created.
"""
- cdef _BaseParser parser
- cdef _Document doc
cdef xmlNode* c_node
cdef xmlDoc* c_doc
if parent is None or parent._doc is None:
return None
ns_utf, name_utf = _getNsTag(tag)
- doc = parent._doc
- c_doc = doc._c_doc
+ c_doc = parent._doc._c_doc
- parser = doc._parser
- if parser is not None and parser._for_html:
+ if parent._doc._parser is not None and parent._doc._parser._for_html:
_htmlTagValidOrRaise(name_utf)
else:
_tagValidOrRaise(name_utf)
@@ -171,24 +156,15 @@
python.PyErr_NoMemory()
tree.xmlAddChild(parent._c_node, c_node)
- try:
- if text is not None:
- _setNodeText(c_node, text)
- if tail is not None:
- _setTailText(c_node, tail)
-
- # add namespaces to node if necessary
- doc._setNodeNamespaces(c_node, ns_utf, nsmap)
- _initNodeAttributes(c_node, doc, attrib, extra_attrs)
- return _elementFactory(doc, c_node)
- except:
- # free allocated c_node/c_doc unless Python does it for us
- if c_node.doc is not c_doc:
- # node not yet in document => will not be freed by document
- if tail is not None:
- _removeText(c_node.next) # tail
- tree.xmlFreeNode(c_node)
- raise
+ if text is not None:
+ _setNodeText(c_node, text)
+ if tail is not None:
+ _setTailText(c_node, tail)
+
+ # add namespaces to node if necessary
+ parent._doc._setNodeNamespaces(c_node, ns_utf, nsmap)
+ _initNodeAttributes(c_node, parent._doc, attrib, extra_attrs)
+ return _elementFactory(parent._doc, c_node)
cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra):
"""Initialise the attributes of an element node.
From scoder at codespeak.net Tue Dec 25 18:07:07 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 25 Dec 2007 18:07:07 +0100 (CET)
Subject: [Lxml-checkins] r50106 - in lxml/trunk: . src/lxml
Message-ID: <20071225170707.029A21684E8@codespeak.net>
Author: scoder
Date: Tue Dec 25 18:07:07 2007
New Revision: 50106
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/_elementpath.py
Log:
r3180 at delle: sbehnel | 2007-12-25 15:54:59 +0100
typo
Modified: lxml/trunk/src/lxml/_elementpath.py
==============================================================================
--- lxml/trunk/src/lxml/_elementpath.py (original)
+++ lxml/trunk/src/lxml/_elementpath.py Tue Dec 25 18:07:07 2007
@@ -125,7 +125,7 @@
if value[:1] == "'" or value[:1] == '"':
value = value[1:-1]
else:
- raise SyntaxError("invalid comparision target")
+ raise SyntaxError("invalid comparison target")
token = next()
def select(result):
for elem in result:
From scoder at codespeak.net Tue Dec 25 18:07:11 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 25 Dec 2007 18:07:11 +0100 (CET)
Subject: [Lxml-checkins] r50107 - in lxml/trunk: . src/lxml
Message-ID: <20071225170711.D5AA91684ED@codespeak.net>
Author: scoder
Date: Tue Dec 25 18:07:11 2007
New Revision: 50107
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
r3181 at delle: sbehnel | 2007-12-25 16:02:46 +0100
cleanup
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Dec 25 18:07:11 2007
@@ -1965,7 +1965,7 @@
(self._node_type != node._c_node.type or
not _tagMatches(node._c_node, self._href, self._name)):
# this cannot raise StopIteration, self._next_node != None
- self.next()
+ self.__next__()
def __iter__(self):
return self
From scoder at codespeak.net Fri Dec 28 13:25:27 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 28 Dec 2007 13:25:27 +0100 (CET)
Subject: [Lxml-checkins] r50159 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20071228122527.8FE52168480@codespeak.net>
Author: scoder
Date: Fri Dec 28 13:25:27 2007
New Revision: 50159
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/lxml.etree.pyx
lxml/trunk/src/lxml/tests/test_etree.py
Log:
r3187 at delle: sbehnel | 2007-12-28 13:23:37 +0100
make 'entity.text' return the textual representation, such as é
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Dec 28 13:25:27 2007
@@ -8,6 +8,9 @@
Features added
--------------
+* ``entity.text`` now returns the textual representation of the
+ entity, e.g. ``&``.
+
Bugs fixed
----------
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Dec 28 13:25:27 2007
@@ -1352,9 +1352,17 @@
def __set__(self, value):
value = _utf8(value)
+ assert '&' not in value and ';' not in value, \
+ "Invalid entity name '%s'" % value
c_text = _cstr(value)
tree.xmlNodeSetName(self._c_node, c_text)
+ property text:
+ # FIXME: should this be None or '&[VALUE];' or the resolved
+ # entity value ?
+ def __get__(self):
+ return '&%s;' % funicode(self._c_node.name)
+
def __repr__(self):
return "&%s;" % self.name
@@ -1940,10 +1948,10 @@
first pre-order). Note that this also includes comments, entities and
processing instructions. To filter them out, check if the ``tag``
property of the returned element is a string (i.e. not None and not a
- factory function).
+ factory function), or pass the ``Element`` factory for the ``tag`` keyword.
- If the optional 'tag' argument is not None, the iterator returns only the
- elements that match the respective name and namespace.
+ If the optional ``tag`` argument is not None, the iterator returns only
+ the elements that match the respective name and namespace.
The optional boolean argument 'inclusive' defaults to True and can be set
to False to exclude the start element itself.
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Fri Dec 28 13:25:27 2007
@@ -582,7 +582,7 @@
tree = parse(StringIO(xml), parser)
root = tree.getroot()
self.assertEquals(root[0].tag, Entity)
- self.assertFalse(root[0].text)
+ self.assertEquals(root[0].text, "&myentity;")
self.assertEquals(root[0].tail, None)
self.assertEquals(root[0].name, "myentity")
@@ -598,7 +598,7 @@
root.append( Entity("test") )
self.assertEquals(root[0].tag, Entity)
- self.assertFalse(root[0].text)
+ self.assertEquals(root[0].text, "&test;")
self.assertEquals(root[0].tail, None)
self.assertEquals(root[0].name, "test")
From scoder at codespeak.net Fri Dec 28 13:25:31 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 28 Dec 2007 13:25:31 +0100 (CET)
Subject: [Lxml-checkins] r50160 - in lxml/trunk: . doc
Message-ID: <20071228122531.0C743168487@codespeak.net>
Author: scoder
Date: Fri Dec 28 13:25:30 2007
New Revision: 50160
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/tutorial.txt
Log:
r3188 at delle: sbehnel | 2007-12-28 13:25:12 +0100
tutorial: show how to restrict iteration to Element objects
Modified: lxml/trunk/doc/tutorial.txt
==============================================================================
--- lxml/trunk/doc/tutorial.txt (original)
+++ lxml/trunk/doc/tutorial.txt Fri Dec 28 13:25:30 2007
@@ -328,6 +328,37 @@
child - Child 1
child - Child 2
+By default, iteration yields all nodes in the tree, including
+ProcessingInstructions, Comments and Entity instances. If you want to
+make sure only Element objects are returned, you can pass the
+``Element`` factory as tag parameter::
+
+ >>> root.append(etree.Entity("#234"))
+ >>> root.append(etree.Comment("some comment"))
+
+ >>> for element in root.iter():
+ ... if isinstance(element.tag, basestring):
+ ... print element.tag, '-', element.text
+ ... else:
+ ... print 'SPECIAL:', element, '-', element.text
+ root - None
+ child - Child 1
+ child - Child 2
+ another - Child 3
+ SPECIAL: ê - ê
+ SPECIAL: - some comment
+
+ >>> for element in root.iter(tag=etree.Element):
+ ... print element.tag, '-', element.text
+ root - None
+ child - Child 1
+ child - Child 2
+ another - Child 3
+
+ >>> for element in root.iter(tag=etree.Entity):
+ ... print element.text
+ ê
+
In lxml.etree, elements provide `further iterators`_ for all directions in the
tree: children, parents (or rather ancestors) and siblings.
From scoder at codespeak.net Fri Dec 28 18:50:35 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 28 Dec 2007 18:50:35 +0100 (CET)
Subject: [Lxml-checkins] r50162 - in lxml/trunk: . src/lxml/tests
Message-ID: <20071228175035.52CA41684F6@codespeak.net>
Author: scoder
Date: Fri Dec 28 18:50:33 2007
New Revision: 50162
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/common_imports.py
Log:
r3191 at delle: sbehnel | 2007-12-28 18:50:25 +0100
skip (c)ET compatibility tests for older library versions
Modified: lxml/trunk/src/lxml/tests/common_imports.py
==============================================================================
--- lxml/trunk/src/lxml/tests/common_imports.py (original)
+++ lxml/trunk/src/lxml/tests/common_imports.py Fri Dec 28 18:50:33 2007
@@ -13,6 +13,11 @@
except ImportError:
ElementTree = None
+if hasattr(ElementTree, 'VERSION'):
+ if tuple(ElementTree.VERSION.split('.')) < (1,3):
+ # compatibility tests require ET 1.3+
+ ElementTree = None
+
try:
import cElementTree # standard ET
except ImportError:
@@ -21,6 +26,11 @@
except ImportError:
cElementTree = None
+if hasattr(cElementTree, 'VERSION'):
+ if tuple(cElementTree.VERSION.split('.')) < (1,0,7):
+ # compatibility tests require cET 1.0.7+
+ cElementTree = None
+
try:
import doctest
# check if the system version has everything we need
From scoder at codespeak.net Sat Dec 29 17:14:32 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 29 Dec 2007 17:14:32 +0100 (CET)
Subject: [Lxml-checkins] r50172 - in lxml/trunk: . src/lxml
Message-ID: <20071229161432.88C5E1684CC@codespeak.net>
Author: scoder
Date: Sat Dec 29 17:14:31 2007
New Revision: 50172
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/xslt.pxi
Log:
r3193 at delle: sbehnel | 2007-12-29 15:39:03 +0100
cleanup, doc
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Sat Dec 29 17:14:31 2007
@@ -343,7 +343,7 @@
def __copy__(self):
cdef XSLT new_xslt
cdef xmlDoc* c_doc
- new_xslt = NEW_XSLT(XSLT)
+ new_xslt = NEW_XSLT(XSLT) # without calling __init__()
new_xslt._access_control = self._access_control
new_xslt._error_log = _ErrorLog()
new_xslt._context = self._context._copy()
@@ -399,7 +399,7 @@
transform_ctxt._private = resolver_context
c_result = self._run_transform(
- input_doc, c_doc, _kw, context, transform_ctxt)
+ c_doc, _kw, context, transform_ctxt)
if transform_ctxt.profile:
c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt)
@@ -438,7 +438,7 @@
result_doc = _documentFactory(c_result, input_doc._parser)
return _xsltResultTreeFactory(result_doc, self, profile_doc)
- cdef xmlDoc* _run_transform(self, _Document input_doc, xmlDoc* c_input_doc,
+ cdef xmlDoc* _run_transform(self, xmlDoc* c_input_doc,
parameters, _XSLTContext context,
xslt.xsltTransformContext* transform_ctxt):
cdef xmlDoc* c_result
From scoder at codespeak.net Sat Dec 29 17:14:36 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 29 Dec 2007 17:14:36 +0100 (CET)
Subject: [Lxml-checkins] r50173 - in lxml/trunk: . src/lxml
Message-ID: <20071229161436.7ED4C1684D9@codespeak.net>
Author: scoder
Date: Sat Dec 29 17:14:35 2007
New Revision: 50173
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/etree_defs.h
Log:
r3194 at delle: sbehnel | 2007-12-29 16:52:23 +0100
make explicit that tuple creation in PY_NEW is an unlikely case
Modified: lxml/trunk/src/lxml/etree_defs.h
==============================================================================
--- lxml/trunk/src/lxml/etree_defs.h (original)
+++ lxml/trunk/src/lxml/etree_defs.h Sat Dec 29 17:14:35 2007
@@ -101,12 +101,23 @@
#define _cstr(s) PyString_AS_STRING(s)
#define _fqtypename(o) (((PyTypeObject*)o)->ob_type->tp_name)
+#ifdef __GNUC__
+/* Test for GCC > 2.95 */
+#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
+#define unlikely_condition(x) __builtin_expect((x), 0)
+#else /* __GNUC__ > 2 ... */
+#define unlikely_condition(x) (x)
+#endif /* __GNUC__ > 2 ... */
+#else /* __GNUC__ */
+#define unlikely_condition(x) (x)
+#endif /* __GNUC__ */
+
static PyObject* __PY_NEW_GLOBAL_EMPTY_TUPLE = NULL;
#define PY_NEW(T) \
(((PyTypeObject*)(T))->tp_new( \
(PyTypeObject*)(T), \
- ((__PY_NEW_GLOBAL_EMPTY_TUPLE == NULL) ? \
+ (unlikely_condition(__PY_NEW_GLOBAL_EMPTY_TUPLE == NULL) ? \
(__PY_NEW_GLOBAL_EMPTY_TUPLE = PyTuple_New(0)) : \
(__PY_NEW_GLOBAL_EMPTY_TUPLE)), \
NULL))
From scoder at codespeak.net Sat Dec 29 17:14:39 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 29 Dec 2007 17:14:39 +0100 (CET)
Subject: [Lxml-checkins] r50174 - in lxml/trunk: . benchmark
Message-ID: <20071229161439.DEDBA1684FC@codespeak.net>
Author: scoder
Date: Sat Dec 29 17:14:39 2007
New Revision: 50174
Modified:
lxml/trunk/ (props changed)
lxml/trunk/benchmark/bench_etree.py
Log:
r3195 at delle: sbehnel | 2007-12-29 17:14:16 +0100
benchmark for result-free tree iteration
Modified: lxml/trunk/benchmark/bench_etree.py
==============================================================================
--- lxml/trunk/benchmark/bench_etree.py (original)
+++ lxml/trunk/benchmark/bench_etree.py Sat Dec 29 17:14:39 2007
@@ -293,6 +293,9 @@
def bench_getiterator_tag_all(self, root):
list(root.getiterator(self.SEARCH_TAG))
+ def bench_getiterator_tag_none(self, root):
+ list(root.getiterator("{ThisShould}NeverExist"))
+
def bench_getiterator_tag_text(self, root):
[ e.text for e in root.getiterator(self.SEARCH_TAG) ]
From scoder at codespeak.net Sat Dec 29 20:10:39 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 29 Dec 2007 20:10:39 +0100 (CET)
Subject: [Lxml-checkins] r50179 - in lxml/trunk: . src/lxml
Message-ID: <20071229191039.81EC3168550@codespeak.net>
Author: scoder
Date: Sat Dec 29 20:10:39 2007
New Revision: 50179
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/xslt.pxi
Log:
r3199 at delle: sbehnel | 2007-12-29 20:09:02 +0100
cleanup
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Sat Dec 29 20:10:39 2007
@@ -461,15 +461,15 @@
try:
i = 0
keep_ref = []
- for key, value in parameters.iteritems():
+ for key, value in parameters.items():
k = _utf8(key)
python.PyList_Append(keep_ref, k)
v = _utf8(value)
python.PyList_Append(keep_ref, v)
params[i] = _cstr(k)
- i = i + 1
+ i += 1
params[i] = _cstr(v)
- i = i + 1
+ i += 1
except:
python.PyMem_Free(params)
raise
From scoder at codespeak.net Sat Dec 29 20:10:43 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 29 Dec 2007 20:10:43 +0100 (CET)
Subject: [Lxml-checkins] r50180 - in lxml/trunk: . src/lxml
Message-ID: <20071229191043.2F095168551@codespeak.net>
Author: scoder
Date: Sat Dec 29 20:10:42 2007
New Revision: 50180
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/etree_defs.h
Log:
r3200 at delle: sbehnel | 2007-12-29 20:10:29 +0100
compilation without threading support adapted to code generated for 'with nogil' block
Modified: lxml/trunk/src/lxml/etree_defs.h
==============================================================================
--- lxml/trunk/src/lxml/etree_defs.h (original)
+++ lxml/trunk/src/lxml/etree_defs.h Sat Dec 29 20:10:42 2007
@@ -28,6 +28,10 @@
# define PyEval_RestoreThread(state)
# define PyGILState_Ensure() (PyGILState_UNLOCKED)
# define PyGILState_Release(state)
+# undef Py_UNBLOCK_THREADS
+# define Py_UNBLOCK_THREADS
+# undef Py_BLOCK_THREADS
+# define Py_BLOCK_THREADS
#endif
#ifdef WITHOUT_THREADING