From scoder at codespeak.net Sun Dec 2 11:32:01 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 2 Dec 2007 11:32:01 +0100 (CET) Subject: [Lxml-checkins] r49263 - lxml/trunk Message-ID: <20071202103201.7407F813C@code0.codespeak.net> Author: scoder Date: Sun Dec 2 11:32:00 2007 New Revision: 49263 Modified: lxml/trunk/TODO.txt Log: cleanup Modified: lxml/trunk/TODO.txt ============================================================================== --- lxml/trunk/TODO.txt (original) +++ lxml/trunk/TODO.txt Sun Dec 2 11:32:00 2007 @@ -22,12 +22,6 @@ e.g. missing namespace mappings in XPath -ElementTree ------------ - -* _setroot(), even though this is not strictly a public method. - - QName ----- From scoder at codespeak.net Sun Dec 2 11:43:22 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 2 Dec 2007 11:43:22 +0100 (CET) Subject: [Lxml-checkins] r49264 - lxml/trunk/doc Message-ID: <20071202104322.86A5F8142@code0.codespeak.net> Author: scoder Date: Sun Dec 2 11:43:22 2007 New Revision: 49264 Modified: lxml/trunk/doc/performance.txt Log: doc cleanup Modified: lxml/trunk/doc/performance.txt ============================================================================== --- lxml/trunk/doc/performance.txt (original) +++ lxml/trunk/doc/performance.txt Sun Dec 2 11:43:22 2007 @@ -598,7 +598,7 @@ the Python objects, thus trading memory for speed. Just create a cache dictionary and run:: - cache[root] = list(root.getiterator()) + cache[root] = list(root.iter()) after parsing and:: From scoder at codespeak.net Sun Dec 2 13:30:07 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 2 Dec 2007 13:30:07 +0100 (CET) Subject: [Lxml-checkins] r49273 - lxml/trunk/src/lxml Message-ID: <20071202123007.7B6A380C9@code0.codespeak.net> Author: scoder Date: Sun Dec 2 13:30:07 2007 New Revision: 49273 Modified: lxml/trunk/src/lxml/cstd.pxd lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/xmlerror.pxi lxml/trunk/src/lxml/xpath.pxi lxml/trunk/src/lxml/xslt.pxi Log: use 'with gil/nogil' where appropriate instead of acuiring/releasing the GIL by hand Modified: lxml/trunk/src/lxml/cstd.pxd ============================================================================== --- lxml/trunk/src/lxml/cstd.pxd (original) +++ lxml/trunk/src/lxml/cstd.pxd Sun Dec 2 13:30:07 2007 @@ -15,6 +15,10 @@ cdef void* memcpy(void* dest, void* src, size_t len) cdef void* memset(void* s, int c, size_t len) +cdef extern from "stdlib.h": + cdef void* malloc(size_t size) + cdef void free(void* ptr) + cdef extern from "stdarg.h": ctypedef void *va_list void va_start(va_list ap, void *last) Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Sun Dec 2 13:30:07 2007 @@ -1181,7 +1181,6 @@ cdef _Element NEW_ELEMENT "PY_NEW" (object t) cdef _Element _elementFactory(_Document doc, xmlNode* c_node): - cdef python.PyThreadState* state cdef _Element result result = getProxy(c_node) if result is not None: @@ -1190,9 +1189,9 @@ return None if config.ENABLE_THREADING: - state = python.PyEval_SaveThread() - python.PyThread_acquire_lock(ELEMENT_CREATION_LOCK, python.WAIT_LOCK) - python.PyEval_RestoreThread(state) + with nogil: + python.PyThread_acquire_lock( + ELEMENT_CREATION_LOCK, python.WAIT_LOCK) result = getProxy(c_node) if result is not None: python.PyThread_release_lock(ELEMENT_CREATION_LOCK) Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sun Dec 2 13:30:07 2007 @@ -409,13 +409,11 @@ xmlparser.xmlClearParserCtxt(self._c_ctxt) cdef int prepare(self) except -1: - cdef python.PyThreadState* state cdef int result if config.ENABLE_THREADING and self._lock is not NULL: - state = python.PyEval_SaveThread() - result = python.PyThread_acquire_lock( - self._lock, python.WAIT_LOCK) - python.PyEval_RestoreThread(state) + with nogil: + result = python.PyThread_acquire_lock( + self._lock, python.WAIT_LOCK) if result == 0: raise ParserError, "parser locking failed" self._error_log.connect() Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Sun Dec 2 13:30:07 2007 @@ -95,12 +95,6 @@ cdef object PyErr_NoMemory() cdef object PyErr_SetFromErrno(object type) - ctypedef enum PyGILState_STATE: - PyGILState_LOCKED - PyGILState_UNLOCKED - - cdef PyGILState_STATE PyGILState_Ensure() - cdef void PyGILState_Release(PyGILState_STATE state) cdef PyThreadState* PyEval_SaveThread() cdef void PyEval_RestoreThread(PyThreadState* state) cdef PyObject* PyThreadState_GetDict() @@ -109,7 +103,7 @@ ctypedef void* PyThread_type_lock cdef PyThread_type_lock PyThread_allocate_lock() cdef void PyThread_free_lock(PyThread_type_lock lock) - cdef int PyThread_acquire_lock(PyThread_type_lock lock, int mode) + cdef int PyThread_acquire_lock(PyThread_type_lock lock, int mode) nogil cdef void PyThread_release_lock(PyThread_type_lock lock) cdef long PyThread_get_thread_ident() Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Sun Dec 2 13:30:07 2007 @@ -370,7 +370,7 @@ # local log functions: forward error to logger object -cdef void _forwardError(void* c_log_handler, xmlerror.xmlError* error): +cdef void _forwardError(void* c_log_handler, xmlerror.xmlError* error) with gil: cdef _BaseErrorLog log_handler if c_log_handler is not NULL: log_handler = <_BaseErrorLog>c_log_handler @@ -378,19 +378,15 @@ log_handler = __GLOBAL_ERROR_LOG log_handler._receive(error) -cdef void _receiveError(void* c_log_handler, xmlerror.xmlError* error): +cdef void _receiveError(void* c_log_handler, xmlerror.xmlError* error) nogil: # no Python objects here, may be called without thread context ! # when we declare a Python object, Pyrex will INCREF(None) ! - cdef python.PyGILState_STATE gil_state if __DEBUG != 0: - gil_state = python.PyGILState_Ensure() _forwardError(c_log_handler, error) - python.PyGILState_Release(gil_state) -cdef void _receiveXSLTError(void* c_log_handler, char* msg, ...): +cdef void _receiveXSLTError(void* c_log_handler, char* msg, ...) nogil: # no Python objects here, may be called without thread context ! # when we declare a Python object, Pyrex will INCREF(None) ! - cdef python.PyGILState_STATE gil_state cdef xmlerror.xmlError c_error cdef cstd.va_list args cdef char* c_text @@ -422,7 +418,6 @@ c_element = NULL cstd.va_end(args) - gil_state = python.PyGILState_Ensure() c_message = NULL if c_text is NULL: c_error.message = '' @@ -431,7 +426,7 @@ else: text_size = cstd.strlen(c_text) element_size = cstd.strlen(c_element) - c_message = python.PyMem_Malloc( + c_message = cstd.malloc( (text_size + 12 + element_size + 1) * sizeof(char)) cstd.sprintf(c_message, "%s, element '%s'", c_text, c_element) c_error.message = c_message @@ -444,8 +439,7 @@ _forwardError(c_log_handler, &c_error) if c_message is not NULL: - python.PyMem_Free(c_error.message) - python.PyGILState_Release(gil_state) + cstd.free(c_error.message) ################################################################################ Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Sun Dec 2 13:30:07 2007 @@ -148,13 +148,11 @@ return c == c'/' cdef int _lock(self) except -1: - cdef python.PyThreadState* state cdef int result if config.ENABLE_THREADING and self._eval_lock != NULL: - state = python.PyEval_SaveThread() - result = python.PyThread_acquire_lock( - self._eval_lock, python.WAIT_LOCK) - python.PyEval_RestoreThread(state) + with nogil: + result = python.PyThread_acquire_lock( + self._eval_lock, python.WAIT_LOCK) if result == 0: raise ParserError, "parser locking failed" return 0 Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Sun Dec 2 13:30:07 2007 @@ -64,16 +64,8 @@ context._parser = parser context._c_style_doc = NULL -cdef xmlDoc* _xslt_resolve_stylesheet(char* c_uri, void* context): - cdef xmlDoc* c_doc - c_doc = (<_XSLTResolverContext>context)._c_style_doc - if c_doc is not NULL and c_doc.URL is not NULL: - if cstd.strcmp(c_uri, c_doc.URL) == 0: - return _copyDoc(c_doc, 1) - return NULL - cdef xmlDoc* _xslt_resolve_from_python(char* c_uri, void* c_context, - int parse_options, int* error): + int parse_options, int* error) with gil: # call the Python document loaders cdef _XSLTResolverContext context cdef _ResolverRegistry resolvers @@ -82,6 +74,14 @@ error[0] = 0 context = <_XSLTResolverContext>c_context + + # shortcut if we resolve the stylesheet itself + c_doc = context._c_style_doc + if c_doc is not NULL and c_doc.URL is not NULL: + if cstd.strcmp(c_uri, c_doc.URL) == 0: + return _copyDoc(c_doc, 1) + + # delegate to the Python resolvers try: resolvers = context._resolvers if cstd.strncmp('string://', c_uri, 9) == 0: @@ -115,7 +115,7 @@ return NULL cdef void _xslt_store_resolver_exception(char* c_uri, void* context, - xslt.xsltLoadType c_type): + xslt.xsltLoadType c_type) with gil: message = "Cannot resolve URI %s" % c_uri if c_type == xslt.XSLT_LOAD_DOCUMENT: exception = XSLTApplyError(message) @@ -125,14 +125,13 @@ cdef xmlDoc* _xslt_doc_loader(char* c_uri, tree.xmlDict* c_dict, int parse_options, void* c_ctxt, - xslt.xsltLoadType c_type): + xslt.xsltLoadType c_type) nogil: # no Python objects here, may be called without thread context ! # when we declare a Python object, Pyrex will INCREF(None) ! cdef xmlDoc* c_doc cdef xmlDoc* result cdef void* c_pcontext cdef int error - cdef python.PyGILState_STATE gil_state # find resolver contexts of stylesheet and transformed doc if c_type == xslt.XSLT_LOAD_DOCUMENT: # transformation time @@ -148,14 +147,6 @@ return XSLT_DOC_DEFAULT_LOADER( c_uri, c_dict, parse_options, c_ctxt, c_type) - gil_state = python.PyGILState_Ensure() - c_doc = _xslt_resolve_stylesheet(c_uri, c_pcontext) - if c_doc is not NULL: - python.PyGILState_Release(gil_state) - if c_type == xslt.XSLT_LOAD_STYLESHEET: - c_doc._private = c_pcontext - return c_doc - c_doc = _xslt_resolve_from_python(c_uri, c_pcontext, parse_options, &error) if c_doc is NULL and not error: c_doc = XSLT_DOC_DEFAULT_LOADER( @@ -163,7 +154,6 @@ if c_doc is NULL: _xslt_store_resolver_exception(c_uri, c_pcontext, c_type) - python.PyGILState_Release(gil_state) if c_doc is not NULL and c_type == xslt.XSLT_LOAD_STYLESHEET: c_doc._private = c_pcontext return c_doc From scoder at codespeak.net Sun Dec 2 14:45:02 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 2 Dec 2007 14:45:02 +0100 (CET) Subject: [Lxml-checkins] r49277 - lxml/trunk/src/lxml Message-ID: <20071202134502.A4F278169@code0.codespeak.net> Author: scoder Date: Sun Dec 2 14:45:01 2007 New Revision: 49277 Modified: lxml/trunk/src/lxml/c14n.pxd lxml/trunk/src/lxml/cstd.pxd lxml/trunk/src/lxml/dtd.pxi lxml/trunk/src/lxml/dtdvalid.pxd lxml/trunk/src/lxml/etreepublic.pxd lxml/trunk/src/lxml/htmlparser.pxd lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/relaxng.pxd lxml/trunk/src/lxml/relaxng.pxi lxml/trunk/src/lxml/schematron.pxd lxml/trunk/src/lxml/schematron.pxi lxml/trunk/src/lxml/serializer.pxi lxml/trunk/src/lxml/tree.pxd lxml/trunk/src/lxml/xinclude.pxd lxml/trunk/src/lxml/xmlerror.pxd lxml/trunk/src/lxml/xmlparser.pxd lxml/trunk/src/lxml/xmlschema.pxd lxml/trunk/src/lxml/xmlschema.pxi lxml/trunk/src/lxml/xpath.pxd lxml/trunk/src/lxml/xpath.pxi lxml/trunk/src/lxml/xslt.pxd lxml/trunk/src/lxml/xslt.pxi Log: use 'with gil/nogil' where appropriate instead of acquiring/releasing the GIL by hand Modified: lxml/trunk/src/lxml/c14n.pxd ============================================================================== --- lxml/trunk/src/lxml/c14n.pxd (original) +++ lxml/trunk/src/lxml/c14n.pxd Sun Dec 2 14:45:01 2007 @@ -7,7 +7,7 @@ int exclusive, char** inclusive_ns_prefixes, int with_comments, - char** doc_txt_ptr) + char** doc_txt_ptr) nogil cdef int xmlC14NDocSave(xmlDoc* doc, xmlNodeSet* nodes, @@ -15,12 +15,12 @@ char** inclusive_ns_prefixes, int with_comments, char* filename, - int compression) + int compression) nogil cdef int xmlC14NDocSaveTo(xmlDoc* doc, xmlNodeSet* nodes, int exclusive, char** inclusive_ns_prefixes, int with_comments, - xmlOutputBuffer* buffer) - + xmlOutputBuffer* buffer) nogil + Modified: lxml/trunk/src/lxml/cstd.pxd ============================================================================== --- lxml/trunk/src/lxml/cstd.pxd (original) +++ lxml/trunk/src/lxml/cstd.pxd Sun Dec 2 14:45:01 2007 @@ -1,29 +1,29 @@ cdef extern from "stdio.h": ctypedef struct FILE - cdef int sprintf(char* str, char* format, ...) - cdef int printf(char* str) + cdef int sprintf(char* str, char* format, ...) nogil + cdef int printf(char* str) nogil cdef extern from "string.h": ctypedef int size_t - cdef int strlen(char* s) - cdef char* strstr(char* haystack, char* needle) - cdef char* strchr(char* haystack, int needle) - cdef char* strrchr(char* haystack, int needle) - cdef int strcmp(char* s1, char* s2) - cdef int strncmp(char* s1, char* s2, size_t len) - cdef void* memcpy(void* dest, void* src, size_t len) - cdef void* memset(void* s, int c, size_t len) + cdef int strlen(char* s) nogil + cdef char* strstr(char* haystack, char* needle) nogil + cdef char* strchr(char* haystack, int needle) nogil + cdef char* strrchr(char* haystack, int needle) nogil + cdef int strcmp(char* s1, char* s2) nogil + cdef int strncmp(char* s1, char* s2, size_t len) nogil + cdef void* memcpy(void* dest, void* src, size_t len) nogil + cdef void* memset(void* s, int c, size_t len) nogil cdef extern from "stdlib.h": - cdef void* malloc(size_t size) - cdef void free(void* ptr) + cdef void* malloc(size_t size) nogil + cdef void free(void* ptr) nogil cdef extern from "stdarg.h": ctypedef void *va_list - void va_start(va_list ap, void *last) - void va_end(va_list ap) + void va_start(va_list ap, void *last) nogil + void va_end(va_list ap) nogil cdef extern from "etree_defs.h": - cdef int va_int(va_list ap) - cdef char *va_charptr(va_list ap) + cdef int va_int(va_list ap) nogil + cdef char *va_charptr(va_list ap) nogil Modified: lxml/trunk/src/lxml/dtd.pxi ============================================================================== --- lxml/trunk/src/lxml/dtd.pxi (original) +++ lxml/trunk/src/lxml/dtd.pxi Sun Dec 2 14:45:01 2007 @@ -53,7 +53,6 @@ Returns true if the document is valid, false if not. """ - cdef python.PyThreadState* state cdef _Document doc cdef _Element root_node cdef xmlDoc* c_doc @@ -70,9 +69,8 @@ raise DTDError, "Failed to create validation context" c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) - state = python.PyEval_SaveThread() - ret = dtdvalid.xmlValidateDtd(valid_ctxt, c_doc, self._c_dtd) - python.PyEval_RestoreThread(state) + with nogil: + ret = dtdvalid.xmlValidateDtd(valid_ctxt, c_doc, self._c_dtd) _destroyFakeDoc(doc._c_doc, c_doc) dtdvalid.xmlFreeValidCtxt(valid_ctxt) Modified: lxml/trunk/src/lxml/dtdvalid.pxd ============================================================================== --- lxml/trunk/src/lxml/dtdvalid.pxd (original) +++ lxml/trunk/src/lxml/dtdvalid.pxd Sun Dec 2 14:45:01 2007 @@ -4,7 +4,7 @@ cdef extern from "libxml/valid.h": ctypedef struct xmlValidCtxt - cdef xmlValidCtxt* xmlNewValidCtxt() - cdef void xmlFreeValidCtxt(xmlValidCtxt* cur) + cdef xmlValidCtxt* xmlNewValidCtxt() nogil + cdef void xmlFreeValidCtxt(xmlValidCtxt* cur) nogil - cdef int xmlValidateDtd(xmlValidCtxt* ctxt, xmlDoc* doc, xmlDtd* dtd) + cdef int xmlValidateDtd(xmlValidCtxt* ctxt, xmlDoc* doc, xmlDtd* dtd) nogil Modified: lxml/trunk/src/lxml/etreepublic.pxd ============================================================================== --- lxml/trunk/src/lxml/etreepublic.pxd (original) +++ lxml/trunk/src/lxml/etreepublic.pxd Sun Dec 2 14:45:01 2007 @@ -5,16 +5,16 @@ cdef extern from "etree_defs.h": # test if c_node is considered an Element (i.e. Element, Comment, etc.) - cdef bint _isElement(tree.xmlNode* c_node) + cdef bint _isElement(tree.xmlNode* c_node) nogil # return the namespace URI of the node or NULL - cdef char* _getNs(tree.xmlNode* node) + cdef char* _getNs(tree.xmlNode* node) nogil # pair of macros for tree traversal cdef void BEGIN_FOR_EACH_ELEMENT_FROM(tree.xmlNode* tree_top, tree.xmlNode* start_node, - int start_node_inclusive) - cdef void END_FOR_EACH_ELEMENT_FROM(tree.xmlNode* start_node) + int start_node_inclusive) nogil + cdef void END_FOR_EACH_ELEMENT_FROM(tree.xmlNode* start_node) nogil cdef extern from "lxml.etree_api.h": @@ -129,23 +129,23 @@ # XML node helper functions # check if the element has at least one child - cdef bint hasChild(tree.xmlNode* c_node) + cdef bint hasChild(tree.xmlNode* c_node) nogil # find child element number 'index' (supports negative indexes) cdef tree.xmlNode* findChild(tree.xmlNode* c_node, - Py_ssize_t index) + Py_ssize_t index) nogil # find child element number 'index' starting at first one cdef tree.xmlNode* findChildForwards(tree.xmlNode* c_node, - Py_ssize_t index) + Py_ssize_t index) nogil # find child element number 'index' starting at last one cdef tree.xmlNode* findChildBackwards(tree.xmlNode* c_node, - Py_ssize_t index) + Py_ssize_t index) nogil # return next/previous sibling element of the node - cdef tree.xmlNode* nextElement(tree.xmlNode* c_node) - cdef tree.xmlNode* previousElement(tree.xmlNode* c_node) + cdef tree.xmlNode* nextElement(tree.xmlNode* c_node) nogil + cdef tree.xmlNode* previousElement(tree.xmlNode* c_node) nogil ########################################################################## # iterators @@ -191,10 +191,10 @@ cdef object namespacedNameFromNsName(char* c_ns, char* c_tag) # check if the node has a text value (which may be '') - cdef bint hasText(tree.xmlNode* c_node) + cdef bint hasText(tree.xmlNode* c_node) nogil # check if the node has a tail value (which may be '') - cdef bint hasTail(tree.xmlNode* c_node) + cdef bint hasTail(tree.xmlNode* c_node) nogil # get the text content of an element (or None) cdef object textOf(tree.xmlNode* c_node) Modified: lxml/trunk/src/lxml/htmlparser.pxd ============================================================================== --- lxml/trunk/src/lxml/htmlparser.pxd (original) +++ lxml/trunk/src/lxml/htmlparser.pxd Sun Dec 2 14:45:01 2007 @@ -14,30 +14,34 @@ HTML_PARSE_RECOVER # Relaxed parsing HTML_PARSE_COMPACT # compact small text nodes - cdef xmlParserCtxt* htmlCreateMemoryParserCtxt(char* buffer, int size) - cdef xmlParserCtxt* htmlCreateFileParserCtxt(char* filename, char* encoding) + cdef xmlParserCtxt* htmlCreateMemoryParserCtxt( + char* buffer, int size) nogil + cdef xmlParserCtxt* htmlCreateFileParserCtxt( + char* filename, char* encoding) nogil cdef xmlParserCtxt* htmlCreatePushParserCtxt(xmlSAXHandler* sax, void* user_data, char* chunk, int size, - char* filename, int enc) - cdef void htmlFreeParserCtxt(xmlParserCtxt* ctxt) - cdef void htmlCtxtReset(xmlParserCtxt* ctxt) - cdef int htmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) - cdef int htmlParseDocument(xmlParserCtxt* ctxt) + char* filename, int enc) nogil + cdef void htmlFreeParserCtxt(xmlParserCtxt* ctxt) nogil + cdef void htmlCtxtReset(xmlParserCtxt* ctxt) nogil + cdef int htmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) nogil + cdef int htmlParseDocument(xmlParserCtxt* ctxt) nogil cdef int htmlParseChunk(xmlParserCtxt* ctxt, - char* chunk, int size, int terminate) + char* chunk, int size, int terminate) nogil cdef xmlDoc* htmlCtxtReadFile(xmlParserCtxt* ctxt, char* filename, char* encoding, - int options) + int options) nogil cdef xmlDoc* htmlCtxtReadDoc(xmlParserCtxt* ctxt, char* buffer, char* URL, char* encoding, - int options) + int options) nogil cdef xmlDoc* htmlCtxtReadIO(xmlParserCtxt* ctxt, xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, void* ioctx, - char* URL, char* encoding, int options) + char* URL, char* encoding, + int options) nogil cdef xmlDoc* htmlCtxtReadMemory(xmlParserCtxt* ctxt, char* buffer, int size, - char* filename, char* encoding, int options) + char* filename, char* encoding, + int options) nogil Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Sun Dec 2 14:45:01 2007 @@ -1588,8 +1588,8 @@ Note that XInclude does not support custom resolvers in Python space due to restrictions of libxml2 <= 2.6.29. """ - cdef python.PyThreadState* state cdef int result + self._assertHasRoot() # We cannot pass the XML_PARSE_NOXINCNODE option as this would free # the XInclude nodes - there may still be Python references to them! # Therefore, we allow XInclude nodes to be converted to @@ -1597,16 +1597,14 @@ # siblings. Tree traversal will simply ignore them as they are not # typed as elements. The included fragment is added between the two, # i.e. as a sibling, which does not conflict with traversal. - self._assertHasRoot() - state = python.PyEval_SaveThread() - if self._context_node._doc._parser is not None: - result = xinclude.xmlXIncludeProcessTreeFlags( - self._context_node._c_node, - self._context_node._doc._parser._parse_options) - else: - result = xinclude.xmlXIncludeProcessTree( - self._context_node._c_node) - python.PyEval_RestoreThread(state) + with nogil: + if self._context_node._doc._parser is not None: + result = xinclude.xmlXIncludeProcessTreeFlags( + self._context_node._c_node, + self._context_node._doc._parser._parse_options) + else: + result = xinclude.xmlXIncludeProcessTree( + self._context_node._c_node) if result == -1: raise XIncludeError, "XInclude processing failed" Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sun Dec 2 14:45:01 2007 @@ -228,7 +228,6 @@ return xmlparser.xmlNewIOInputStream(ctxt, c_buffer, 0) cdef xmlDoc* _readDoc(self, xmlparser.xmlParserCtxt* ctxt, int options): - cdef python.PyThreadState* state cdef xmlDoc* result cdef char* c_encoding @@ -237,29 +236,24 @@ else: c_encoding = _cstr(self._encoding) - state = python.PyEval_SaveThread() - if ctxt.html: - result = htmlparser.htmlCtxtReadIO( - ctxt, _readFilelikeParser, NULL, self, - self._c_url, c_encoding, options) - else: - result = xmlparser.xmlCtxtReadIO( - ctxt, _readFilelikeParser, NULL, self, - self._c_url, c_encoding, options) - python.PyEval_RestoreThread(state) + with nogil: + if ctxt.html: + result = htmlparser.htmlCtxtReadIO( + ctxt, _readFilelikeParser, NULL, self, + self._c_url, c_encoding, options) + else: + result = xmlparser.xmlCtxtReadIO( + ctxt, _readFilelikeParser, NULL, self, + self._c_url, c_encoding, options) return result cdef tree.xmlDtd* _readDtd(self): - cdef python.PyThreadState* state - cdef tree.xmlDtd* result cdef xmlparser.xmlParserInputBuffer* c_buffer c_buffer = xmlparser.xmlAllocParserInputBuffer(0) c_buffer.context = self c_buffer.readcallback = _readFilelikeParser - state = python.PyEval_SaveThread() - result = xmlparser.xmlIOParseDTD(NULL, c_buffer, 0) - python.PyEval_RestoreThread(state) - return result + with nogil: + return xmlparser.xmlIOParseDTD(NULL, c_buffer, 0) cdef int copyToBuffer(self, char* c_buffer, int c_size): cdef char* c_start @@ -699,7 +693,6 @@ """Parse unicode document, share dictionary if possible. """ cdef _ParserContext context - cdef python.PyThreadState* state cdef xmlDoc* result cdef xmlparser.xmlParserCtxt* pctxt cdef Py_ssize_t py_buffer_len @@ -719,16 +712,15 @@ __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt) c_text = python.PyUnicode_AS_DATA(utext) - state = python.PyEval_SaveThread() - if self._for_html: - result = htmlparser.htmlCtxtReadMemory( - pctxt, c_text, buffer_len, c_filename, _UNICODE_ENCODING, - self._parse_options) - else: - result = xmlparser.xmlCtxtReadMemory( - pctxt, c_text, buffer_len, c_filename, _UNICODE_ENCODING, - self._parse_options) - python.PyEval_RestoreThread(state) + with nogil: + if self._for_html: + result = htmlparser.htmlCtxtReadMemory( + pctxt, c_text, buffer_len, c_filename, _UNICODE_ENCODING, + self._parse_options) + else: + result = xmlparser.xmlCtxtReadMemory( + pctxt, c_text, buffer_len, c_filename, _UNICODE_ENCODING, + self._parse_options) return context._handleParseResultDoc(self, result, None) finally: @@ -739,7 +731,6 @@ """Parse document, share dictionary if possible. """ cdef _ParserContext context - cdef python.PyThreadState* state cdef xmlDoc* result cdef xmlparser.xmlParserCtxt* pctxt cdef char* c_encoding @@ -757,16 +748,15 @@ else: c_encoding = _cstr(self._default_encoding) - state = python.PyEval_SaveThread() - if self._for_html: - result = htmlparser.htmlCtxtReadMemory( - pctxt, c_text, c_len, c_filename, - c_encoding, self._parse_options) - else: - result = xmlparser.xmlCtxtReadMemory( - pctxt, c_text, c_len, c_filename, - c_encoding, self._parse_options) - python.PyEval_RestoreThread(state) + with nogil: + if self._for_html: + result = htmlparser.htmlCtxtReadMemory( + pctxt, c_text, c_len, c_filename, + c_encoding, self._parse_options) + else: + result = xmlparser.xmlCtxtReadMemory( + pctxt, c_text, c_len, c_filename, + c_encoding, self._parse_options) return context._handleParseResultDoc(self, result, None) finally: @@ -774,7 +764,6 @@ cdef xmlDoc* _parseDocFromFile(self, char* c_filename) except NULL: cdef _ParserContext context - cdef python.PyThreadState* state cdef xmlDoc* result cdef xmlparser.xmlParserCtxt* pctxt cdef int orig_options @@ -793,14 +782,13 @@ c_encoding = _cstr(self._default_encoding) orig_options = pctxt.options - state = python.PyEval_SaveThread() - if self._for_html: - result = htmlparser.htmlCtxtReadFile( - pctxt, c_filename, c_encoding, self._parse_options) - else: - result = xmlparser.xmlCtxtReadFile( - pctxt, c_filename, c_encoding, self._parse_options) - python.PyEval_RestoreThread(state) + with nogil: + if self._for_html: + result = htmlparser.htmlCtxtReadFile( + pctxt, c_filename, c_encoding, self._parse_options) + else: + result = xmlparser.xmlCtxtReadFile( + pctxt, c_filename, c_encoding, self._parse_options) pctxt.options = orig_options # work around libxml2 problem return context._handleParseResultDoc(self, result, c_filename) @@ -1630,13 +1618,12 @@ return result cdef xmlDoc* _copyDoc(xmlDoc* c_doc, int recursive) except NULL: - cdef python.PyThreadState* state cdef xmlDoc* result if recursive: - state = python.PyEval_SaveThread() - result = tree.xmlCopyDoc(c_doc, recursive) - if recursive: - python.PyEval_RestoreThread(state) + with nogil: + result = tree.xmlCopyDoc(c_doc, recursive) + else: + result = tree.xmlCopyDoc(c_doc, 0) if result is NULL: python.PyErr_NoMemory() __GLOBAL_PARSER_CONTEXT.initDocDict(result) @@ -1644,14 +1631,12 @@ cdef xmlDoc* _copyDocRoot(xmlDoc* c_doc, xmlNode* c_new_root) except NULL: "Recursively copy the document and make c_new_root the new root node." - cdef python.PyThreadState* state cdef xmlDoc* result cdef xmlNode* c_node result = tree.xmlCopyDoc(c_doc, 0) # non recursive __GLOBAL_PARSER_CONTEXT.initDocDict(result) - state = python.PyEval_SaveThread() - c_node = tree.xmlDocCopyNode(c_new_root, result, 1) # recursive - python.PyEval_RestoreThread(state) + with nogil: + c_node = tree.xmlDocCopyNode(c_new_root, result, 1) # recursive if c_node is NULL: python.PyErr_NoMemory() tree.xmlDocSetRootElement(result, c_node) Modified: lxml/trunk/src/lxml/relaxng.pxd ============================================================================== --- lxml/trunk/src/lxml/relaxng.pxd (original) +++ lxml/trunk/src/lxml/relaxng.pxd Sun Dec 2 14:45:01 2007 @@ -49,12 +49,11 @@ XML_RELAXNG_ERR_ELEMWRONG = 38 XML_RELAXNG_ERR_TEXTWRONG = 39 - cdef xmlRelaxNGValidCtxt* xmlRelaxNGNewValidCtxt(xmlRelaxNG* schema) - cdef int xmlRelaxNGValidateDoc(xmlRelaxNGValidCtxt* ctxt, xmlDoc* doc) - cdef xmlRelaxNG* xmlRelaxNGParse(xmlRelaxNGParserCtxt* ctxt) - cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewParserCtxt(char* URL) - cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewDocParserCtxt(xmlDoc* doc) - cdef void xmlRelaxNGFree(xmlRelaxNG* schema) - cdef void xmlRelaxNGFreeParserCtxt(xmlRelaxNGParserCtxt* ctxt) - cdef void xmlRelaxNGFreeValidCtxt(xmlRelaxNGValidCtxt* ctxt) - + cdef xmlRelaxNGValidCtxt* xmlRelaxNGNewValidCtxt(xmlRelaxNG* schema) nogil + cdef int xmlRelaxNGValidateDoc(xmlRelaxNGValidCtxt* ctxt, xmlDoc* doc) nogil + cdef xmlRelaxNG* xmlRelaxNGParse(xmlRelaxNGParserCtxt* ctxt) nogil + cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewParserCtxt(char* URL) nogil + cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewDocParserCtxt(xmlDoc* doc) nogil + cdef void xmlRelaxNGFree(xmlRelaxNG* schema) nogil + cdef void xmlRelaxNGFreeParserCtxt(xmlRelaxNGParserCtxt* ctxt) nogil + cdef void xmlRelaxNGFreeValidCtxt(xmlRelaxNGValidCtxt* ctxt) nogil Modified: lxml/trunk/src/lxml/relaxng.pxi ============================================================================== --- lxml/trunk/src/lxml/relaxng.pxi (original) +++ lxml/trunk/src/lxml/relaxng.pxi Sun Dec 2 14:45:01 2007 @@ -86,7 +86,6 @@ """Validate doc using Relax NG. Returns true if document is valid, false if not.""" - cdef python.PyThreadState* state cdef _Document doc cdef _Element root_node cdef xmlDoc* c_doc @@ -103,9 +102,8 @@ python.PyErr_NoMemory() c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) - state = python.PyEval_SaveThread() - ret = relaxng.xmlRelaxNGValidateDoc(valid_ctxt, c_doc) - python.PyEval_RestoreThread(state) + with nogil: + ret = relaxng.xmlRelaxNGValidateDoc(valid_ctxt, c_doc) _destroyFakeDoc(doc._c_doc, c_doc) relaxng.xmlRelaxNGFreeValidCtxt(valid_ctxt) Modified: lxml/trunk/src/lxml/schematron.pxd ============================================================================== --- lxml/trunk/src/lxml/schematron.pxd (original) +++ lxml/trunk/src/lxml/schematron.pxd Sun Dec 2 14:45:01 2007 @@ -14,15 +14,17 @@ XML_SCHEMATRON_OUT_BUFFER = 512 # output to a buffer XML_SCHEMATRON_OUT_IO = 1024 # output to I/O mechanism - cdef xmlSchematronParserCtxt* xmlSchematronNewDocParserCtxt(xmlDoc* doc) - cdef xmlSchematronParserCtxt* xmlSchematronNewParserCtxt(char* filename) - cdef xmlSchematronValidCtxt* xmlSchematronNewValidCtxt(xmlSchematron* schema, - int options) + cdef xmlSchematronParserCtxt* xmlSchematronNewDocParserCtxt( + xmlDoc* doc) nogil + cdef xmlSchematronParserCtxt* xmlSchematronNewParserCtxt( + char* filename) nogil + cdef xmlSchematronValidCtxt* xmlSchematronNewValidCtxt( + xmlSchematron* schema, int options) nogil - cdef xmlSchematron* xmlSchematronParse(xmlSchematronParserCtxt* ctxt) + cdef xmlSchematron* xmlSchematronParse(xmlSchematronParserCtxt* ctxt) nogil cdef int xmlSchematronValidateDoc(xmlSchematronValidCtxt* ctxt, - xmlDoc* instance) + xmlDoc* instance) nogil - cdef void xmlSchematronFreeParserCtxt(xmlSchematronParserCtxt* ctxt) - cdef void xmlSchematronFreeValidCtxt(xmlSchematronValidCtxt* ctxt) - cdef void xmlSchematronFree(xmlSchematron* schema) + cdef void xmlSchematronFreeParserCtxt(xmlSchematronParserCtxt* ctxt) nogil + cdef void xmlSchematronFreeValidCtxt(xmlSchematronValidCtxt* ctxt) nogil + cdef void xmlSchematronFree(xmlSchematron* schema) nogil Modified: lxml/trunk/src/lxml/schematron.pxi ============================================================================== --- lxml/trunk/src/lxml/schematron.pxi (original) +++ lxml/trunk/src/lxml/schematron.pxi Sun Dec 2 14:45:01 2007 @@ -117,7 +117,6 @@ """Validate doc using Schematron. Returns true if document is valid, false if not.""" - cdef python.PyThreadState* state cdef _Document doc cdef _Element root_node cdef xmlDoc* c_doc @@ -140,9 +139,8 @@ raise SchematronError, "Failed to create validation context" c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) - state = python.PyEval_SaveThread() - ret = schematron.xmlSchematronValidateDoc(valid_ctxt, c_doc) - python.PyEval_RestoreThread(state) + with nogil: + ret = schematron.xmlSchematronValidateDoc(valid_ctxt, c_doc) _destroyFakeDoc(doc._c_doc, c_doc) schematron.xmlSchematronFreeValidCtxt(valid_ctxt) Modified: lxml/trunk/src/lxml/serializer.pxi ============================================================================== --- lxml/trunk/src/lxml/serializer.pxi (original) +++ lxml/trunk/src/lxml/serializer.pxi Sun Dec 2 14:45:01 2007 @@ -18,11 +18,9 @@ raise ValueError, "unknown output method %r" % method cdef _textToString(xmlNode* c_node, encoding): - cdef python.PyThreadState* state cdef char* c_text - state = python.PyEval_SaveThread() - c_text = tree.xmlNodeGetContent(c_node) - python.PyEval_RestoreThread(state) + with nogil: + c_text = tree.xmlNodeGetContent(c_node) if c_text is NULL: python.PyErr_NoMemory() @@ -49,7 +47,6 @@ """Serialize an element to an encoded string representation of its XML tree. """ - cdef python.PyThreadState* state cdef tree.xmlOutputBuffer* c_buffer cdef tree.xmlBuffer* c_result_buffer cdef tree.xmlCharEncodingHandler* enchandler @@ -77,17 +74,17 @@ tree.xmlCharEncCloseFunc(enchandler) raise LxmlError, "Failed to create output buffer" - try: - state = python.PyEval_SaveThread() + with nogil: _writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_method, write_xml_declaration, write_complete_document, pretty_print) tree.xmlOutputBufferFlush(c_buffer) - python.PyEval_RestoreThread(state) if c_buffer.conv is not NULL: c_result_buffer = c_buffer.conv else: c_result_buffer = c_buffer.buffer + + try: result = python.PyString_FromStringAndSize( tree.xmlBufferContent(c_result_buffer), tree.xmlBufferLength(c_result_buffer)) @@ -100,7 +97,6 @@ """Serialize an element to the Python unicode representation of its XML tree. """ - cdef python.PyThreadState* state cdef tree.xmlOutputBuffer* c_buffer cdef tree.xmlBuffer* c_result_buffer cdef int c_method @@ -113,16 +109,17 @@ c_buffer = tree.xmlAllocOutputBuffer(NULL) if c_buffer is NULL: raise LxmlError, "Failed to create output buffer" - try: - state = python.PyEval_SaveThread() + + with nogil: _writeNodeToBuffer(c_buffer, element._c_node, NULL, c_method, 0, write_complete_document, pretty_print) tree.xmlOutputBufferFlush(c_buffer) - python.PyEval_RestoreThread(state) if c_buffer.conv is not NULL: c_result_buffer = c_buffer.conv else: c_result_buffer = c_buffer.buffer + + try: result = python.PyUnicode_DecodeUTF8( tree.xmlBufferContent(c_result_buffer), tree.xmlBufferLength(c_result_buffer), @@ -135,7 +132,7 @@ xmlNode* c_node, char* encoding, int c_method, bint write_xml_declaration, bint write_complete_document, - bint pretty_print): + bint pretty_print) nogil: cdef xmlDoc* c_doc cdef xmlNode* c_nsdecl_node c_doc = c_node.doc @@ -177,7 +174,7 @@ _writeNextSiblings(c_buffer, c_node, encoding, pretty_print) cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer, - char* version, char* encoding): + char* version, char* encoding) nogil: if version is NULL: version = "1.0" tree.xmlOutputBufferWriteString(c_buffer, "\n") cdef void _writeDtdToBuffer(tree.xmlOutputBuffer* c_buffer, - xmlDoc* c_doc, char* c_root_name, char* encoding): + xmlDoc* c_doc, char* c_root_name, + char* encoding) nogil: cdef tree.xmlDtd* c_dtd cdef xmlNode* c_node c_dtd = c_doc.intSubset @@ -222,7 +220,7 @@ tree.xmlOutputBufferWrite(c_buffer, 3, "]>\n") cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, - char* encoding, bint pretty_print): + char* encoding, bint pretty_print) nogil: "Write the element tail." c_node = c_node.next while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE: @@ -231,7 +229,7 @@ c_node = c_node.next cdef void _writePrevSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, - char* encoding, bint pretty_print): + char* encoding, bint pretty_print) nogil: cdef xmlNode* c_sibling if c_node.parent is not NULL and _isElement(c_node.parent): return @@ -247,7 +245,7 @@ c_sibling = c_sibling.next cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, - char* encoding, bint pretty_print): + char* encoding, bint pretty_print) nogil: cdef xmlNode* c_sibling if c_node.parent is not NULL and _isElement(c_node.parent): return @@ -358,7 +356,6 @@ writer._exc_context._raise_if_stored() cdef _tofilelikeC14N(f, _Element element): - cdef python.PyThreadState* state cdef _FilelikeWriter writer cdef tree.xmlOutputBuffer* c_buffer cdef char* c_filename @@ -372,9 +369,9 @@ if _isString(f): filename8 = _encodeFilename(f) c_filename = _cstr(filename8) - state = python.PyEval_SaveThread() - bytes = c14n.xmlC14NDocSave(c_doc, NULL, 0, NULL, 1, c_filename, 0) - python.PyEval_RestoreThread(state) + with nogil: + bytes = c14n.xmlC14NDocSave(c_doc, NULL, 0, NULL, 1, + c_filename, 0) elif hasattr(f, 'write'): writer = _FilelikeWriter(f) c_buffer = writer._createOutputBuffer(NULL) Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Sun Dec 2 14:45:01 2007 @@ -35,21 +35,22 @@ XML_CHAR_ENCODING_ASCII = 22 # pure ASCII ctypedef struct xmlCharEncodingHandler - cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name) - cdef xmlCharEncodingHandler* xmlGetCharEncodingHandler(xmlCharEncoding enc) - cdef int xmlCharEncCloseFunc(xmlCharEncodingHandler* handler) - cdef xmlCharEncoding xmlDetectCharEncoding(char* text, int len) - cdef char* xmlGetCharEncodingName(xmlCharEncoding enc) - cdef xmlCharEncoding xmlParseCharEncoding(char* name) + cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name) nogil + cdef xmlCharEncodingHandler* xmlGetCharEncodingHandler( + xmlCharEncoding enc) nogil + cdef int xmlCharEncCloseFunc(xmlCharEncodingHandler* handler) nogil + cdef xmlCharEncoding xmlDetectCharEncoding(char* text, int len) nogil + cdef char* xmlGetCharEncodingName(xmlCharEncoding enc) nogil + cdef xmlCharEncoding xmlParseCharEncoding(char* name) nogil cdef extern from "libxml/chvalid.h": - cdef int xmlIsChar_ch(char c) + cdef int xmlIsChar_ch(char c) nogil cdef extern from "libxml/hash.h": ctypedef struct xmlHashTable - ctypedef void xmlHashScanner(void* payload, void* data, char* name) - void xmlHashScan(xmlHashTable* table, xmlHashScanner f, void* data) - void* xmlHashLookup(xmlHashTable* table, char* name) + ctypedef void xmlHashScanner(void* payload, void* data, char* name) nogil + void xmlHashScan(xmlHashTable* table, xmlHashScanner f, void* data) nogil + void* xmlHashLookup(xmlHashTable* table, char* name) nogil cdef extern from "libxml/tree.h": @@ -167,106 +168,112 @@ xmlBuffer* buffer xmlBuffer* conv - cdef void xmlFreeDoc(xmlDoc* cur) - cdef void xmlFreeDtd(xmlDtd* cur) - cdef void xmlFreeNode(xmlNode* cur) - cdef void xmlFreeNsList(xmlNs* ns) - cdef void xmlFreeNs(xmlNs* ns) - cdef void xmlFree(char* buf) + cdef void xmlFreeDoc(xmlDoc* cur) nogil + cdef void xmlFreeDtd(xmlDtd* cur) nogil + cdef void xmlFreeNode(xmlNode* cur) nogil + cdef void xmlFreeNsList(xmlNs* ns) nogil + cdef void xmlFreeNs(xmlNs* ns) nogil + cdef void xmlFree(char* buf) nogil - cdef xmlNode* xmlNewNode(xmlNs* ns, char* name) - cdef xmlNode* xmlNewDocText(xmlDoc* doc, char* content) - cdef xmlNode* xmlNewDocComment(xmlDoc* doc, char* content) - cdef xmlNode* xmlNewDocPI(xmlDoc* doc, char* name, char* content) - cdef xmlNode* xmlNewReference(xmlDoc* doc, char* name) - cdef xmlNs* xmlNewNs(xmlNode* node, char* href, char* prefix) - cdef xmlNode* xmlAddChild(xmlNode* parent, xmlNode* cur) - cdef xmlNode* xmlReplaceNode(xmlNode* old, xmlNode* cur) - cdef xmlNode* xmlAddPrevSibling(xmlNode* cur, xmlNode* elem) - cdef xmlNode* xmlAddNextSibling(xmlNode* cur, xmlNode* elem) + cdef xmlNode* xmlNewNode(xmlNs* ns, char* name) nogil + cdef xmlNode* xmlNewDocText(xmlDoc* doc, char* content) nogil + cdef xmlNode* xmlNewDocComment(xmlDoc* doc, char* content) nogil + cdef xmlNode* xmlNewDocPI(xmlDoc* doc, char* name, char* content) nogil + cdef xmlNode* xmlNewReference(xmlDoc* doc, char* name) nogil + cdef xmlNs* xmlNewNs(xmlNode* node, char* href, char* prefix) nogil + cdef xmlNode* xmlAddChild(xmlNode* parent, xmlNode* cur) nogil + cdef xmlNode* xmlReplaceNode(xmlNode* old, xmlNode* cur) nogil + cdef xmlNode* xmlAddPrevSibling(xmlNode* cur, xmlNode* elem) nogil + cdef xmlNode* xmlAddNextSibling(xmlNode* cur, xmlNode* elem) nogil cdef xmlNode* xmlNewDocNode(xmlDoc* doc, xmlNs* ns, - char* name, char* content) - cdef xmlDoc* xmlNewDoc(char* version) - cdef xmlAttr* xmlNewProp(xmlNode* node, char* name, char* value) + char* name, char* content) nogil + cdef xmlDoc* xmlNewDoc(char* version) nogil + cdef xmlAttr* xmlNewProp(xmlNode* node, char* name, char* value) nogil cdef xmlAttr* xmlNewNsProp(xmlNode* node, xmlNs* ns, - char* name, char* value) - cdef char* xmlGetNoNsProp(xmlNode* node, char* name) - cdef char* xmlGetNsProp(xmlNode* node, char* name, char* nameSpace) - cdef void xmlSetNs(xmlNode* node, xmlNs* ns) - cdef xmlAttr* xmlSetProp(xmlNode* node, char* name, char* value) + char* name, char* value) nogil + cdef char* xmlGetNoNsProp(xmlNode* node, char* name) nogil + cdef char* xmlGetNsProp(xmlNode* node, char* name, char* nameSpace) nogil + cdef void xmlSetNs(xmlNode* node, xmlNs* ns) nogil + cdef xmlAttr* xmlSetProp(xmlNode* node, char* name, char* value) nogil cdef xmlAttr* xmlSetNsProp(xmlNode* node, xmlNs* ns, - char* name, char* value) - cdef int xmlRemoveProp(xmlAttr* cur) - cdef char* xmlGetNodePath(xmlNode* node) - cdef void xmlDocDumpMemory(xmlDoc* cur, char** mem, int* size) + char* name, char* value) nogil + cdef int xmlRemoveProp(xmlAttr* cur) nogil + cdef char* xmlGetNodePath(xmlNode* node) nogil + cdef void xmlDocDumpMemory(xmlDoc* cur, char** mem, int* size) nogil cdef void xmlDocDumpMemoryEnc(xmlDoc* cur, char** mem, int* size, - char* encoding) - cdef int xmlSaveFileTo(xmlOutputBuffer* out, xmlDoc* cur, char* encoding) - - cdef void xmlUnlinkNode(xmlNode* cur) - cdef xmlNode* xmlDocSetRootElement(xmlDoc* doc, xmlNode* root) - cdef xmlNode* xmlDocGetRootElement(xmlDoc* doc) - cdef void xmlSetTreeDoc(xmlNode* tree, xmlDoc* doc) - cdef xmlAttr* xmlHasProp(xmlNode* node, char* name) - cdef xmlAttr* xmlHasNsProp(xmlNode* node, char* name, char* nameSpace) - cdef char* xmlNodeGetContent(xmlNode* cur) - cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, char* prefix) - cdef xmlNs* xmlSearchNsByHref(xmlDoc* doc, xmlNode* node, char* href) - cdef int xmlIsBlankNode(xmlNode* node) - cdef long xmlGetLineNo(xmlNode* node) - cdef void xmlElemDump(FILE* f, xmlDoc* doc, xmlNode* cur) + char* encoding) nogil + cdef int xmlSaveFileTo(xmlOutputBuffer* out, xmlDoc* cur, + char* encoding) nogil + + cdef void xmlUnlinkNode(xmlNode* cur) nogil + cdef xmlNode* xmlDocSetRootElement(xmlDoc* doc, xmlNode* root) nogil + cdef xmlNode* xmlDocGetRootElement(xmlDoc* doc) nogil + cdef void xmlSetTreeDoc(xmlNode* tree, xmlDoc* doc) nogil + cdef xmlAttr* xmlHasProp(xmlNode* node, char* name) nogil + cdef xmlAttr* xmlHasNsProp(xmlNode* node, char* name, char* nameSpace) nogil + cdef char* xmlNodeGetContent(xmlNode* cur) nogil + cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, char* prefix) nogil + cdef xmlNs* xmlSearchNsByHref(xmlDoc* doc, xmlNode* node, char* href) nogil + cdef int xmlIsBlankNode(xmlNode* node) nogil + cdef long xmlGetLineNo(xmlNode* node) nogil + cdef void xmlElemDump(FILE* f, xmlDoc* doc, xmlNode* cur) nogil cdef void xmlNodeDumpOutput(xmlOutputBuffer* buf, xmlDoc* doc, xmlNode* cur, int level, - int format, char* encoding) - cdef void xmlNodeSetName(xmlNode* cur, char* name) - cdef void xmlNodeSetContent(xmlNode* cur, char* content) - cdef xmlDtd* xmlCopyDtd(xmlDtd* dtd) - cdef xmlDoc* xmlCopyDoc(xmlDoc* doc, int recursive) - cdef xmlNode* xmlCopyNode(xmlNode* node, int extended) - cdef xmlNode* xmlDocCopyNode(xmlNode* node, xmlDoc* doc, int extended) - cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree) - cdef xmlNs* xmlNewReconciliedNs(xmlDoc* doc, xmlNode* tree, xmlNs* ns) - cdef xmlBuffer* xmlBufferCreate() - cdef void xmlBufferFree(xmlBuffer* buf) - cdef char* xmlBufferContent(xmlBuffer* buf) - cdef int xmlBufferLength(xmlBuffer* buf) - cdef int xmlKeepBlanksDefault(int val) - cdef char* xmlNodeGetBase(xmlDoc* doc, xmlNode* node) - cdef char* xmlBuildURI(char* href, char* base) - cdef int xmlValidateNCName(char* value, int space) + int format, char* encoding) nogil + cdef void xmlNodeSetName(xmlNode* cur, char* name) nogil + cdef void xmlNodeSetContent(xmlNode* cur, char* content) nogil + cdef xmlDtd* xmlCopyDtd(xmlDtd* dtd) nogil + cdef xmlDoc* xmlCopyDoc(xmlDoc* doc, int recursive) nogil + cdef xmlNode* xmlCopyNode(xmlNode* node, int extended) nogil + cdef xmlNode* xmlDocCopyNode(xmlNode* node, xmlDoc* doc, int extended) nogil + cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree) nogil + cdef xmlNs* xmlNewReconciliedNs(xmlDoc* doc, xmlNode* tree, xmlNs* ns) nogil + cdef xmlBuffer* xmlBufferCreate() nogil + cdef void xmlBufferFree(xmlBuffer* buf) nogil + cdef char* xmlBufferContent(xmlBuffer* buf) nogil + cdef int xmlBufferLength(xmlBuffer* buf) nogil + cdef int xmlKeepBlanksDefault(int val) nogil + cdef char* xmlNodeGetBase(xmlDoc* doc, xmlNode* node) nogil + cdef char* xmlBuildURI(char* href, char* base) nogil + cdef int xmlValidateNCName(char* value, int space) nogil cdef extern from "libxml/HTMLtree.h": cdef void htmlNodeDumpFormatOutput(xmlOutputBuffer* buf, xmlDoc* doc, xmlNode* cur, - char* encoding, int format) + char* encoding, int format) nogil cdef extern from "libxml/valid.h": - cdef xmlAttr* xmlGetID(xmlDoc* doc, char* ID) - cdef void xmlDumpNotationTable(xmlBuffer* buffer, xmlNotationTable* table) + cdef xmlAttr* xmlGetID(xmlDoc* doc, char* ID) nogil + cdef void xmlDumpNotationTable(xmlBuffer* buffer, + xmlNotationTable* table) nogil cdef extern from "libxml/xmlIO.h": - cdef void xmlBufferWriteQuotedString(xmlOutputBuffer* out, char* str) - cdef int xmlOutputBufferWriteString(xmlOutputBuffer* out, char* str) - cdef int xmlOutputBufferWrite(xmlOutputBuffer* out, int len, char* str) - cdef int xmlOutputBufferFlush(xmlOutputBuffer* out) - cdef int xmlOutputBufferClose(xmlOutputBuffer* out) - - ctypedef int (*xmlInputReadCallback)(void* context, char* buffer, int len) - ctypedef int (*xmlInputCloseCallback)(void* context) - - ctypedef int (*xmlOutputWriteCallback)(void* context, char* buffer, int len) - ctypedef int (*xmlOutputCloseCallback)(void* context) + cdef void xmlBufferWriteQuotedString(xmlOutputBuffer* out, char* str) nogil + cdef int xmlOutputBufferWriteString(xmlOutputBuffer* out, char* str) nogil + cdef int xmlOutputBufferWrite(xmlOutputBuffer* out, + int len, char* str) nogil + cdef int xmlOutputBufferFlush(xmlOutputBuffer* out) nogil + cdef int xmlOutputBufferClose(xmlOutputBuffer* out) nogil + + ctypedef int (*xmlInputReadCallback)(void* context, + char* buffer, int len) nogil + ctypedef int (*xmlInputCloseCallback)(void* context) nogil + + ctypedef int (*xmlOutputWriteCallback)(void* context, + char* buffer, int len) nogil + ctypedef int (*xmlOutputCloseCallback)(void* context) nogil - cdef xmlOutputBuffer* xmlAllocOutputBuffer(xmlCharEncodingHandler* encoder) + cdef xmlOutputBuffer* xmlAllocOutputBuffer( + xmlCharEncodingHandler* encoder) nogil cdef xmlOutputBuffer* xmlOutputBufferCreateIO( xmlOutputWriteCallback iowrite, xmlOutputCloseCallback ioclose, void * ioctx, - xmlCharEncodingHandler* encoder) + xmlCharEncodingHandler* encoder) nogil cdef xmlOutputBuffer* xmlOutputBufferCreateFile( - FILE* file, xmlCharEncodingHandler* encoder) + FILE* file, xmlCharEncodingHandler* encoder) nogil cdef xmlOutputBuffer* xmlOutputBufferCreateFilename( - char* URI, xmlCharEncodingHandler* encoder, int compression) + char* URI, xmlCharEncodingHandler* encoder, int compression) nogil cdef extern from "libxml/xmlsave.h": ctypedef struct xmlSaveCtxt @@ -278,31 +285,32 @@ XML_SAVE_NO_XHTML = 8 # disable XHTML1 specific rules (2.6.22) cdef xmlSaveCtxt* xmlSaveToFilename(char* filename, char* encoding, - int options) + int options) nogil cdef xmlSaveCtxt* xmlSaveToBuffer(xmlBuffer* buffer, char* encoding, - int options) # libxml2 2.6.23 - cdef long xmlSaveDoc(xmlSaveCtxt* ctxt, xmlDoc* doc) - cdef long xmlSaveTree(xmlSaveCtxt* ctxt, xmlNode* node) - cdef int xmlSaveClose(xmlSaveCtxt* ctxt) - cdef int xmlSaveFlush(xmlSaveCtxt* ctxt) - cdef int xmlSaveSetAttrEscape(xmlSaveCtxt* ctxt, void* escape_func) - cdef int xmlSaveSetEscape(xmlSaveCtxt* ctxt, void* escape_func) + int options) nogil # libxml2 2.6.23 + cdef long xmlSaveDoc(xmlSaveCtxt* ctxt, xmlDoc* doc) nogil + cdef long xmlSaveTree(xmlSaveCtxt* ctxt, xmlNode* node) nogil + cdef int xmlSaveClose(xmlSaveCtxt* ctxt) nogil + cdef int xmlSaveFlush(xmlSaveCtxt* ctxt) nogil + cdef int xmlSaveSetAttrEscape(xmlSaveCtxt* ctxt, void* escape_func) nogil + cdef int xmlSaveSetEscape(xmlSaveCtxt* ctxt, void* escape_func) nogil cdef extern from "libxml/globals.h": - cdef int xmlThrDefKeepBlanksDefaultValue(int onoff) - cdef int xmlThrDefLineNumbersDefaultValue(int onoff) - cdef int xmlThrDefIndentTreeOutput(int onoff) + cdef int xmlThrDefKeepBlanksDefaultValue(int onoff) nogil + cdef int xmlThrDefLineNumbersDefaultValue(int onoff) nogil + cdef int xmlThrDefIndentTreeOutput(int onoff) nogil cdef extern from "libxml/xmlstring.h": - cdef char* xmlStrdup(char* cur) + cdef char* xmlStrdup(char* cur) nogil cdef extern from "libxml/xmlmemory.h": - cdef void* xmlMalloc(size_t size) + cdef void* xmlMalloc(size_t size) nogil cdef extern from "etree_defs.h": - cdef bint _isElement(xmlNode* node) - cdef bint _isElementOrXInclude(xmlNode* node) - cdef char* _getNs(xmlNode* node) + cdef bint _isElement(xmlNode* node) nogil + cdef bint _isElementOrXInclude(xmlNode* node) nogil + cdef char* _getNs(xmlNode* node) nogil cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top, - xmlNode* start_node, bint inclusive) - cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node) + xmlNode* start_node, + bint inclusive) nogil + cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node) nogil Modified: lxml/trunk/src/lxml/xinclude.pxd ============================================================================== --- lxml/trunk/src/lxml/xinclude.pxd (original) +++ lxml/trunk/src/lxml/xinclude.pxd Sun Dec 2 14:45:01 2007 @@ -4,14 +4,15 @@ ctypedef struct xmlXIncludeCtxt - cdef int xmlXIncludeProcess(xmlDoc* doc) - cdef int xmlXIncludeProcessFlags(xmlDoc* doc, int parser_opts) - cdef int xmlXIncludeProcessTree(xmlNode* doc) - cdef int xmlXIncludeProcessTreeFlags(xmlNode* doc, int parser_opts) + cdef int xmlXIncludeProcess(xmlDoc* doc) nogil + cdef int xmlXIncludeProcessFlags(xmlDoc* doc, int parser_opts) nogil + cdef int xmlXIncludeProcessTree(xmlNode* doc) nogil + cdef int xmlXIncludeProcessTreeFlags(xmlNode* doc, int parser_opts) nogil - cdef xmlXIncludeCtxt* xmlXIncludeNewContext(xmlDoc* doc) - cdef int xmlXIncludeProcessNode(xmlXIncludeCtxt* ctxt, xmlNode* node) - cdef int xmlXIncludeSetFlags(xmlXIncludeCtxt* ctxt, int flags) + cdef xmlXIncludeCtxt* xmlXIncludeNewContext(xmlDoc* doc) nogil + cdef int xmlXIncludeProcessNode(xmlXIncludeCtxt* ctxt, xmlNode* node) nogil + cdef int xmlXIncludeSetFlags(xmlXIncludeCtxt* ctxt, int flags) nogil # libxml2 >= 2.6.27 - cdef int xmlXIncludeProcessFlagsData(xmlDoc* doc, int flags, void* data) + cdef int xmlXIncludeProcessFlagsData( + xmlDoc* doc, int flags, void* data) nogil Modified: lxml/trunk/src/lxml/xmlerror.pxd ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxd (original) +++ lxml/trunk/src/lxml/xmlerror.pxd Sun Dec 2 14:45:01 2007 @@ -783,14 +783,17 @@ int int1 int int2 - ctypedef void (*xmlGenericErrorFunc)(void* ctxt, char* msg, ...) - ctypedef void (*xmlStructuredErrorFunc)(void* userData, xmlError* error) + ctypedef void (*xmlGenericErrorFunc)(void* ctxt, char* msg, ...) nogil + ctypedef void (*xmlStructuredErrorFunc)(void* userData, + xmlError* error) nogil - cdef void xmlSetGenericErrorFunc(void* ctxt, xmlGenericErrorFunc func) - cdef void xmlSetStructuredErrorFunc(void* ctxt, xmlStructuredErrorFunc func) + cdef void xmlSetGenericErrorFunc( + void* ctxt, xmlGenericErrorFunc func) nogil + cdef void xmlSetStructuredErrorFunc( + void* ctxt, xmlStructuredErrorFunc func) nogil cdef extern from "libxml/globals.h": - cdef void xmlThrDefSetGenericErrorFunc(void* ctx, - xmlGenericErrorFunc handler) - cdef void xmlThrDefSetStructuredErrorFunc(void* ctx, - xmlStructuredErrorFunc handler) + cdef void xmlThrDefSetGenericErrorFunc( + void* ctx, xmlGenericErrorFunc handler) nogil + cdef void xmlThrDefSetStructuredErrorFunc( + void* ctx, xmlStructuredErrorFunc handler) nogil Modified: lxml/trunk/src/lxml/xmlparser.pxd ============================================================================== --- lxml/trunk/src/lxml/xmlparser.pxd (original) +++ lxml/trunk/src/lxml/xmlparser.pxd Sun Dec 2 14:45:01 2007 @@ -65,14 +65,14 @@ int initialized cdef extern from "libxml/xmlIO.h": - cdef xmlParserInputBuffer* xmlAllocParserInputBuffer(int enc) + cdef xmlParserInputBuffer* xmlAllocParserInputBuffer(int enc) nogil cdef extern from "libxml/parser.h": - cdef xmlDict* xmlDictCreate() - cdef xmlDict* xmlDictCreateSub(xmlDict* subdict) - cdef void xmlDictFree(xmlDict* sub) - cdef int xmlDictReference(xmlDict* dict) + cdef xmlDict* xmlDictCreate() nogil + cdef xmlDict* xmlDictCreateSub(xmlDict* subdict) nogil + cdef void xmlDictFree(xmlDict* sub) nogil + cdef int xmlDictReference(xmlDict* dict) nogil cdef int XML_COMPLETE_ATTRS # SAX option for adding DTD default attributes @@ -118,31 +118,34 @@ # libxml2 2.6.21+ only: XML_PARSE_COMPACT = 65536 # compact small text nodes - cdef void xmlInitParser() - cdef int xmlLineNumbersDefault(int onoff) - cdef xmlParserCtxt* xmlNewParserCtxt() + cdef void xmlInitParser() nogil + cdef int xmlLineNumbersDefault(int onoff) nogil + cdef xmlParserCtxt* xmlNewParserCtxt() nogil cdef xmlParserInput* xmlNewIOInputStream(xmlParserCtxt* ctxt, xmlParserInputBuffer* input, - int enc) - cdef int xmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) - cdef void xmlFreeParserCtxt(xmlParserCtxt* ctxt) - cdef void xmlCtxtReset(xmlParserCtxt* ctxt) - cdef void xmlClearParserCtxt(xmlParserCtxt* ctxt) + int enc) nogil + cdef int xmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) nogil + cdef void xmlFreeParserCtxt(xmlParserCtxt* ctxt) nogil + cdef void xmlCtxtReset(xmlParserCtxt* ctxt) nogil + cdef void xmlClearParserCtxt(xmlParserCtxt* ctxt) nogil cdef int xmlParseChunk(xmlParserCtxt* ctxt, - char* chunk, int size, int terminate) + char* chunk, int size, int terminate) nogil cdef xmlDoc* xmlCtxtReadDoc(xmlParserCtxt* ctxt, char* cur, char* URL, char* encoding, - int options) + int options) nogil cdef xmlDoc* xmlCtxtReadFile(xmlParserCtxt* ctxt, - char* filename, char* encoding, int options) + char* filename, char* encoding, + int options) nogil cdef xmlDoc* xmlCtxtReadIO(xmlParserCtxt* ctxt, xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, void* ioctx, - char* URL, char* encoding, int options) + char* URL, char* encoding, + int options) nogil cdef xmlDoc* xmlCtxtReadMemory(xmlParserCtxt* ctxt, char* buffer, int size, - char* filename, char* encoding, int options) + char* filename, char* encoding, + int options) nogil # iterparse: @@ -150,33 +153,32 @@ void* user_data, char* chunk, int size, - char* filename) + char* filename) nogil cdef int xmlCtxtResetPush(xmlParserCtxt* ctxt, char* chunk, int size, char* filename, - char* encoding) + char* encoding) nogil # entity loaders: - ctypedef xmlParserInput* (*xmlExternalEntityLoader)(char * URL, - char * ID, - xmlParserCtxt* context) - cdef xmlExternalEntityLoader xmlGetExternalEntityLoader() - cdef void xmlSetExternalEntityLoader(xmlExternalEntityLoader f) + ctypedef xmlParserInput* (*xmlExternalEntityLoader)( + char * URL, char * ID, xmlParserCtxt* context) nogil + cdef xmlExternalEntityLoader xmlGetExternalEntityLoader() nogil + cdef void xmlSetExternalEntityLoader(xmlExternalEntityLoader f) nogil # DTDs: - cdef xmlDtd* xmlParseDTD(char* ExternalID, char* SystemID) + cdef xmlDtd* xmlParseDTD(char* ExternalID, char* SystemID) nogil cdef xmlDtd* xmlIOParseDTD(xmlSAXHandler* sax, xmlParserInputBuffer* input, - int enc) + int enc) nogil cdef extern from "libxml/parserInternals.h": cdef xmlParserInput* xmlNewStringInputStream(xmlParserCtxt* ctxt, - char* buffer) + char* buffer) nogil cdef xmlParserInput* xmlNewInputFromFile(xmlParserCtxt* ctxt, - char* filename) - cdef void xmlFreeInputStream(xmlParserInput* input) - cdef int xmlSwitchEncoding(xmlParserCtxt* ctxt, int enc) + char* filename) nogil + cdef void xmlFreeInputStream(xmlParserInput* input) nogil + cdef int xmlSwitchEncoding(xmlParserCtxt* ctxt, int enc) nogil Modified: lxml/trunk/src/lxml/xmlschema.pxd ============================================================================== --- lxml/trunk/src/lxml/xmlschema.pxd (original) +++ lxml/trunk/src/lxml/xmlschema.pxd Sun Dec 2 14:45:01 2007 @@ -4,14 +4,14 @@ cdef extern from "libxml/xmlschemas.h": ctypedef struct xmlSchema ctypedef struct xmlSchemaParserCtxt - + ctypedef struct xmlSchemaValidCtxt - - cdef xmlSchemaValidCtxt* xmlSchemaNewValidCtxt(xmlSchema* schema) - cdef int xmlSchemaValidateDoc(xmlSchemaValidCtxt* ctxt, xmlDoc* doc) - cdef xmlSchema* xmlSchemaParse(xmlSchemaParserCtxt* ctxt) - cdef xmlSchemaParserCtxt* xmlSchemaNewParserCtxt(char* URL) - cdef xmlSchemaParserCtxt* xmlSchemaNewDocParserCtxt(xmlDoc* doc) - cdef void xmlSchemaFree(xmlSchema* schema) - cdef void xmlSchemaFreeParserCtxt(xmlSchemaParserCtxt* ctxt) - cdef void xmlSchemaFreeValidCtxt(xmlSchemaValidCtxt* ctxt) + + cdef xmlSchemaValidCtxt* xmlSchemaNewValidCtxt(xmlSchema* schema) nogil + cdef int xmlSchemaValidateDoc(xmlSchemaValidCtxt* ctxt, xmlDoc* doc) nogil + cdef xmlSchema* xmlSchemaParse(xmlSchemaParserCtxt* ctxt) nogil + cdef xmlSchemaParserCtxt* xmlSchemaNewParserCtxt(char* URL) nogil + cdef xmlSchemaParserCtxt* xmlSchemaNewDocParserCtxt(xmlDoc* doc) nogil + cdef void xmlSchemaFree(xmlSchema* schema) nogil + cdef void xmlSchemaFreeParserCtxt(xmlSchemaParserCtxt* ctxt) nogil + cdef void xmlSchemaFreeValidCtxt(xmlSchemaValidCtxt* ctxt) nogil Modified: lxml/trunk/src/lxml/xmlschema.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlschema.pxi (original) +++ lxml/trunk/src/lxml/xmlschema.pxi Sun Dec 2 14:45:01 2007 @@ -81,7 +81,6 @@ Returns true if document is valid, false if not. """ - cdef python.PyThreadState* state cdef xmlschema.xmlSchemaValidCtxt* valid_ctxt cdef _Document doc cdef _Element root_node @@ -98,9 +97,8 @@ raise XMLSchemaError, "Failed to create validation context" c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) - state = python.PyEval_SaveThread() - ret = xmlschema.xmlSchemaValidateDoc(valid_ctxt, c_doc) - python.PyEval_RestoreThread(state) + with nogil: + ret = xmlschema.xmlSchemaValidateDoc(valid_ctxt, c_doc) _destroyFakeDoc(doc._c_doc, c_doc) xmlschema.xmlSchemaFreeValidCtxt(valid_ctxt) Modified: lxml/trunk/src/lxml/xpath.pxd ============================================================================== --- lxml/trunk/src/lxml/xpath.pxd (original) +++ lxml/trunk/src/lxml/xpath.pxd Sun Dec 2 14:45:01 2007 @@ -68,66 +68,65 @@ tree.xmlNode* ancestor int error - ctypedef struct xmlXPathCompExpr: - pass + ctypedef struct xmlXPathCompExpr ctypedef void (*xmlXPathFunction)(xmlXPathParserContext* ctxt, int nargs) ctypedef xmlXPathFunction (*xmlXPathFuncLookupFunc)(void* ctxt, char* name, char* ns_uri) - cdef xmlXPathContext* xmlXPathNewContext(tree.xmlDoc* doc) + cdef xmlXPathContext* xmlXPathNewContext(tree.xmlDoc* doc) nogil cdef xmlXPathObject* xmlXPathEvalExpression(char* str, - xmlXPathContext* ctxt) + xmlXPathContext* ctxt) nogil cdef xmlXPathObject* xmlXPathCompiledEval(xmlXPathCompExpr* comp, - xmlXPathContext* ctxt) - cdef xmlXPathCompExpr* xmlXPathCompile(char* str) + xmlXPathContext* ctxt) nogil + cdef xmlXPathCompExpr* xmlXPathCompile(char* str) nogil cdef xmlXPathCompExpr* xmlXPathCtxtCompile(xmlXPathContext* ctxt, - char* str) - cdef void xmlXPathFreeContext(xmlXPathContext* ctxt) - cdef void xmlXPathFreeCompExpr(xmlXPathCompExpr* comp) - cdef void xmlXPathFreeObject(xmlXPathObject* obj) + char* str) nogil + cdef void xmlXPathFreeContext(xmlXPathContext* ctxt) nogil + cdef void xmlXPathFreeCompExpr(xmlXPathCompExpr* comp) nogil + cdef void xmlXPathFreeObject(xmlXPathObject* obj) nogil cdef int xmlXPathRegisterNs(xmlXPathContext* ctxt, - char* prefix, char* ns_uri) + char* prefix, char* ns_uri) nogil - cdef xmlNodeSet* xmlXPathNodeSetCreate(tree.xmlNode* val) - cdef void xmlXPathFreeNodeSet(xmlNodeSet* val) + cdef xmlNodeSet* xmlXPathNodeSetCreate(tree.xmlNode* val) nogil + cdef void xmlXPathFreeNodeSet(xmlNodeSet* val) nogil cdef extern from "libxml/xpathInternals.h": cdef int xmlXPathRegisterFunc(xmlXPathContext* ctxt, char* name, - xmlXPathFunction f) + xmlXPathFunction f) nogil cdef int xmlXPathRegisterFuncNS(xmlXPathContext* ctxt, char* name, char* ns_uri, - xmlXPathFunction f) + xmlXPathFunction f) nogil cdef void xmlXPathRegisterFuncLookup(xmlXPathContext *ctxt, xmlXPathFuncLookupFunc f, - void *funcCtxt) + void *funcCtxt) nogil cdef int xmlXPathRegisterVariable(xmlXPathContext *ctxt, char* name, - xmlXPathObject* value) + xmlXPathObject* value) nogil cdef int xmlXPathRegisterVariableNS(xmlXPathContext *ctxt, char* name, char* ns_uri, - xmlXPathObject* value) - cdef void xmlXPathRegisteredVariablesCleanup(xmlXPathContext *ctxt) - cdef void xmlXPathRegisteredNsCleanup(xmlXPathContext *ctxt) - cdef xmlXPathObject* valuePop (xmlXPathParserContext *ctxt) - cdef int valuePush(xmlXPathParserContext* ctxt, xmlXPathObject *value) + xmlXPathObject* value) nogil + cdef void xmlXPathRegisteredVariablesCleanup(xmlXPathContext *ctxt) nogil + cdef void xmlXPathRegisteredNsCleanup(xmlXPathContext *ctxt) nogil + cdef xmlXPathObject* valuePop (xmlXPathParserContext *ctxt) nogil + cdef int valuePush(xmlXPathParserContext* ctxt, xmlXPathObject *value) nogil - cdef xmlXPathObject* xmlXPathNewCString(char *val) - cdef xmlXPathObject* xmlXPathWrapCString(char * val) - cdef xmlXPathObject* xmlXPathNewString(char *val) - cdef xmlXPathObject* xmlXPathWrapString(char * val) - cdef xmlXPathObject* xmlXPathNewFloat(double val) - cdef xmlXPathObject* xmlXPathNewBoolean(int val) - cdef xmlXPathObject* xmlXPathNewNodeSet(tree.xmlNode* val) - cdef xmlXPathObject* xmlXPathNewValueTree(tree.xmlNode* val) + cdef xmlXPathObject* xmlXPathNewCString(char *val) nogil + cdef xmlXPathObject* xmlXPathWrapCString(char * val) nogil + cdef xmlXPathObject* xmlXPathNewString(char *val) nogil + cdef xmlXPathObject* xmlXPathWrapString(char * val) nogil + cdef xmlXPathObject* xmlXPathNewFloat(double val) nogil + cdef xmlXPathObject* xmlXPathNewBoolean(int val) nogil + cdef xmlXPathObject* xmlXPathNewNodeSet(tree.xmlNode* val) nogil + cdef xmlXPathObject* xmlXPathNewValueTree(tree.xmlNode* val) nogil cdef void xmlXPathNodeSetAdd(xmlNodeSet* cur, - tree.xmlNode* val) + tree.xmlNode* val) nogil cdef void xmlXPathNodeSetAddUnique(xmlNodeSet* cur, - tree.xmlNode* val) - cdef xmlXPathObject* xmlXPathWrapNodeSet(xmlNodeSet* val) - cdef void xmlXPathErr(xmlXPathParserContext* ctxt, int error) + tree.xmlNode* val) nogil + cdef xmlXPathObject* xmlXPathWrapNodeSet(xmlNodeSet* val) nogil + cdef void xmlXPathErr(xmlXPathParserContext* ctxt, int error) nogil Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Sun Dec 2 14:45:01 2007 @@ -248,7 +248,6 @@ Absolute XPath expressions (starting with '/') will be evaluated against the ElementTree as returned by getroottree(). """ - cdef python.PyThreadState* state cdef xpath.xmlXPathObject* xpathObj cdef _Document doc cdef char* c_path @@ -261,10 +260,10 @@ try: self._context.register_context(doc) self._context.registerVariables(_variables) - state = python.PyEval_SaveThread() - xpathObj = xpath.xmlXPathEvalExpression( - _cstr(path), self._xpathCtxt) - python.PyEval_RestoreThread(state) + c_path = _cstr(path) + with nogil: + xpathObj = xpath.xmlXPathEvalExpression( + c_path, self._xpathCtxt) result = self._handle_result(xpathObj, doc) finally: self._error_log.disconnect() @@ -292,10 +291,10 @@ Variables may be provided as keyword arguments. Note that namespaces are currently not supported for variables. """ - cdef python.PyThreadState* state cdef xpath.xmlXPathObject* xpathObj cdef xmlDoc* c_doc cdef _Document doc + cdef char* c_path path = _utf8(_path) doc = self._element._doc @@ -306,12 +305,12 @@ c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node) try: self._context.registerVariables(_variables) - state = python.PyEval_SaveThread() - self._xpathCtxt.doc = c_doc - self._xpathCtxt.node = tree.xmlDocGetRootElement(c_doc) - xpathObj = xpath.xmlXPathEvalExpression( - _cstr(path), self._xpathCtxt) - python.PyEval_RestoreThread(state) + c_path = _cstr(path) + with nogil: + self._xpathCtxt.doc = c_doc + self._xpathCtxt.node = tree.xmlDocGetRootElement(c_doc) + xpathObj = xpath.xmlXPathEvalExpression( + c_path, self._xpathCtxt) result = self._handle_result(xpathObj, doc) finally: _destroyFakeDoc(doc._c_doc, c_doc) @@ -370,7 +369,6 @@ self._raise_parse_error() def __call__(self, _etree_or_element, **_variables): - cdef python.PyThreadState* state cdef xpath.xmlXPathObject* xpathObj cdef _Document document cdef _Element element @@ -387,10 +385,9 @@ try: self._context.register_context(document) self._context.registerVariables(_variables) - state = python.PyEval_SaveThread() - xpathObj = xpath.xmlXPathCompiledEval( - self._xpath, self._xpathCtxt) - python.PyEval_RestoreThread(state) + with nogil: + xpathObj = xpath.xmlXPathCompiledEval( + self._xpath, self._xpathCtxt) result = self._handle_result(xpathObj, document) finally: self._error_log.disconnect() Modified: lxml/trunk/src/lxml/xslt.pxd ============================================================================== --- lxml/trunk/src/lxml/xslt.pxd (original) +++ lxml/trunk/src/lxml/xslt.pxd Sun Dec 2 14:45:01 2007 @@ -23,20 +23,21 @@ xmlDict* dict int profile - cdef xsltStylesheet* xsltParseStylesheetDoc(xmlDoc* doc) - cdef void xsltFreeStylesheet(xsltStylesheet* sheet) + cdef xsltStylesheet* xsltParseStylesheetDoc(xmlDoc* doc) nogil + cdef void xsltFreeStylesheet(xsltStylesheet* sheet) nogil cdef extern from "libxslt/extensions.h": cdef int xsltRegisterExtFunction(xsltTransformContext* ctxt, char* name, char* URI, - xmlXPathFunction function) + xmlXPathFunction function) nogil cdef int xsltRegisterExtModuleFunction(char* name, char* URI, - xmlXPathFunction function) + xmlXPathFunction function) nogil cdef int xsltUnregisterExtModuleFunction(char* name, char* URI) - cdef xmlXPathFunction xsltExtModuleFunctionLookup(char* name, char* URI) + cdef xmlXPathFunction xsltExtModuleFunctionLookup( + char* name, char* URI) nogil cdef int xsltRegisterExtPrefix(xsltStylesheet* style, - char* prefix, char* URI) + char* prefix, char* URI) nogil cdef extern from "libxslt/documents.h": ctypedef enum xsltLoadType: @@ -49,30 +50,30 @@ void* ctxt, xsltLoadType type) cdef xsltDocLoaderFunc xsltDocDefaultLoader - cdef void xsltSetLoaderFunc(xsltDocLoaderFunc f) + cdef void xsltSetLoaderFunc(xsltDocLoaderFunc f) nogil cdef extern from "libxslt/transform.h": cdef xmlDoc* xsltApplyStylesheet(xsltStylesheet* style, xmlDoc* doc, - char** params) + char** params) nogil cdef xmlDoc* xsltApplyStylesheetUser(xsltStylesheet* style, xmlDoc* doc, char** params, char* output, void* profile, - xsltTransformContext* context) + xsltTransformContext* context) nogil cdef xsltTransformContext* xsltNewTransformContext(xsltStylesheet* style, - xmlDoc* doc) - cdef void xsltFreeTransformContext(xsltTransformContext* context) + xmlDoc* doc) nogil + cdef void xsltFreeTransformContext(xsltTransformContext* context) nogil cdef extern from "libxslt/xsltutils.h": cdef int xsltSaveResultToString(char** doc_txt_ptr, int* doc_txt_len, xmlDoc* result, - xsltStylesheet* style) + xsltStylesheet* style) nogil - cdef void xsltSetGenericErrorFunc(void* ctxt, - void (*handler)(void* ctxt, char* msg, ...)) - cdef void xsltSetTransformErrorFunc(xsltTransformContext*, - void* ctxt, - void (*handler)(void* ctxt, char* msg, ...)) + cdef void xsltSetGenericErrorFunc( + void* ctxt, void (*handler)(void* ctxt, char* msg, ...)) nogil + cdef void xsltSetTransformErrorFunc( + xsltTransformContext*, void* ctxt, + void (*handler)(void* ctxt, char* msg, ...)) nogil cdef extern from "libxslt/security.h": ctypedef struct xsltSecurityPrefs @@ -87,20 +88,20 @@ xsltTransformContext* ctxt, char* value) - cdef xsltSecurityPrefs* xsltNewSecurityPrefs() - cdef void xsltFreeSecurityPrefs(xsltSecurityPrefs* sec) + cdef xsltSecurityPrefs* xsltNewSecurityPrefs() nogil + cdef void xsltFreeSecurityPrefs(xsltSecurityPrefs* sec) nogil cdef int xsltSecurityForbid(xsltSecurityPrefs* sec, xsltTransformContext* ctxt, - char* value) + char* value) nogil cdef int xsltSecurityAllow(xsltSecurityPrefs* sec, xsltTransformContext* ctxt, - char* value) + char* value) nogil cdef int xsltSetSecurityPrefs(xsltSecurityPrefs* sec, xsltSecurityOption option, - xsltSecurityCheck func) + xsltSecurityCheck func) nogil cdef int xsltSetCtxtSecurityPrefs(xsltSecurityPrefs* sec, - xsltTransformContext* ctxt) - cdef xmlDoc* xsltGetProfileInformation(xsltTransformContext* ctxt) + xsltTransformContext* ctxt) nogil + cdef xmlDoc* xsltGetProfileInformation(xsltTransformContext* ctxt) nogil cdef extern from "libxslt/extra.h": cdef char* XSLT_LIBXSLT_NAMESPACE @@ -109,7 +110,7 @@ cdef char* XSLT_XT_NAMESPACE cdef xmlXPathFunction xsltFunctionNodeSet - cdef void xsltRegisterAllExtras() + cdef void xsltRegisterAllExtras() nogil cdef extern from "libexslt/exslt.h": - cdef void exsltRegisterAll() + cdef void exsltRegisterAll() nogil Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Sun Dec 2 14:45:01 2007 @@ -271,7 +271,6 @@ def __init__(self, xslt_input, extensions=None, regexp=True, access_control=None): - cdef python.PyThreadState* state cdef xslt.xsltStylesheet* c_style cdef xmlDoc* c_doc cdef xmlDoc* fake_c_doc @@ -301,9 +300,8 @@ c_doc._private = self._xslt_resolver_context self._error_log.connect() - state = python.PyEval_SaveThread() - c_style = xslt.xsltParseStylesheetDoc(c_doc) - python.PyEval_RestoreThread(state) + with nogil: + c_style = xslt.xsltParseStylesheetDoc(c_doc) self._error_log.disconnect() if c_style is NULL: @@ -443,7 +441,6 @@ cdef xmlDoc* _run_transform(self, _Document input_doc, xmlDoc* c_input_doc, parameters, _XSLTContext context, xslt.xsltTransformContext* transform_ctxt): - cdef python.PyThreadState* state cdef xmlDoc* c_result cdef char** params cdef Py_ssize_t i, parameter_count @@ -480,10 +477,9 @@ else: params = NULL - state = python.PyEval_SaveThread() - c_result = xslt.xsltApplyStylesheetUser( - self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt) - python.PyEval_RestoreThread(state) + with nogil: + c_result = xslt.xsltApplyStylesheetUser( + self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt) if params is not NULL: # deallocate space for parameters @@ -499,7 +495,6 @@ cdef XSLT _xslt cdef _Document _profile cdef _saveToStringAndSize(self, char** s, int* l): - cdef python.PyThreadState* state cdef _Document doc cdef int r if self._context_node is not None: @@ -509,9 +504,9 @@ if doc is None: s[0] = NULL return - state = python.PyEval_SaveThread() - r = xslt.xsltSaveResultToString(s, l, doc._c_doc, self._xslt._c_style) - python.PyEval_RestoreThread(state) + with nogil: + r = xslt.xsltSaveResultToString(s, l, doc._c_doc, + self._xslt._c_style) if r == -1: raise XSLTSaveError, "Error saving XSLT result to string" From scoder at codespeak.net Wed Dec 5 19:30:47 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 5 Dec 2007 19:30:47 +0100 (CET) Subject: [Lxml-checkins] r49400 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20071205183047.C59DA1684CC@codespeak.net> Author: scoder Date: Wed Dec 5 19:30:47 2007 New Revision: 49400 Modified: lxml/trunk/TODO.txt lxml/trunk/selftest.py lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/xmlerror.pxi lxml/trunk/src/lxml/xslt.pxi Log: properties 'position' and 'code' on ParseError exceptions, small fixes to XSLT error reporting Modified: lxml/trunk/TODO.txt ============================================================================== --- lxml/trunk/TODO.txt (original) +++ lxml/trunk/TODO.txt Wed Dec 5 19:30:47 2007 @@ -54,6 +54,6 @@ * clean support for entities (is the Entity element class enough?) -* implement 'position' property on ParseError exception - -* rewrite iterparse() to accept a parser as argument instead of being one +* rewrite iterparse() to accept a parser as argument instead of being + one (or maybe not: iterparse() can't deal with all parser options + anyway). Modified: lxml/trunk/selftest.py ============================================================================== --- lxml/trunk/selftest.py (original) +++ lxml/trunk/selftest.py Wed Dec 5 19:30:47 2007 @@ -653,7 +653,7 @@ except ET.ParseError: return sys.exc_value -# doesn't work with lxml.etree +# doesn't work with lxml.etree -> different positions del error def namespace(): Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Wed Dec 5 19:30:47 2007 @@ -13,7 +13,10 @@ class XMLSyntaxError(ParseError): """Syntax error while parsing an XML document. """ - pass + def __init__(self, message, code, line, column): + ParseError.__init__(self, message) + self.position = (line, column) + self.code = code class ParserError(LxmlError): """Internal lxml parser error. @@ -449,7 +452,6 @@ context._initParserContext(c_ctxt) context._error_log = _ErrorLog() - cdef int _raiseParseError(xmlparser.xmlParserCtxt* ctxt, filename, _ErrorLog error_log) except 0: if filename is not None and \ @@ -458,18 +460,21 @@ message = "Error reading file '%s': %s" % ( filename, (ctxt.lastError.message).strip()) else: - message = "Error reading file '%s'" % filename - raise IOError, message + message = "Error reading '%s'" % filename + raise IOError(message) elif error_log: - raise XMLSyntaxError, error_log._buildExceptionMessage( - "Document is not well formed") + raise error_log._buildParseException( + XMLSyntaxError, "Document is not well formed") elif ctxt.lastError.message is not NULL: message = (ctxt.lastError.message).strip() + code = ctxt.lastError.code + line = ctxt.lastError.line + column = ctxt.lastError.int2 if ctxt.lastError.line > 0: - message = "line %d: %s" % (ctxt.lastError.line, message) - raise XMLSyntaxError, message + message = "line %d: %s" % (line, message) + raise XMLSyntaxError(message, code, line, column) else: - raise XMLSyntaxError + raise XMLSyntaxError(None, xmlerror.XML_ERR_INTERNAL_ERROR, 0, 0) cdef xmlDoc* _handleParseResult(_ParserContext context, xmlparser.xmlParserCtxt* c_ctxt, @@ -931,7 +936,8 @@ cdef xmlDoc* c_doc cdef _Document doc if not self._feed_parser_running: - raise XMLSyntaxError, "no element found" + raise XMLSyntaxError("no element found", + xmlerror.XML_ERR_INTERNAL_ERROR, 0, 0) context = self._getPushParserContext() pctxt = context._c_ctxt Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Wed Dec 5 19:30:47 2007 @@ -3008,6 +3008,17 @@ self.assertRaises(ParseError, parser.close) + def test_feed_parser_error_position(self): + ParseError = self.etree.ParseError + parser = self.etree.XMLParser() + try: + parser.close() + except ParseError, e: + self.assertNotEquals(None, e.code) + self.assertNotEquals(0, e.code) + self.assert_(isinstance(e.position, tuple)) + self.assert_(e.position >= (0, 0)) + # parser target interface def test_parser_target_tag(self): Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Wed Dec 5 19:30:47 2007 @@ -128,6 +128,28 @@ if is_error: self.last_error = entry + cdef _buildParseException(self, exctype, default_message): + code = xmlerror.XML_ERR_INTERNAL_ERROR + if self._first_error is None: + return exctype(default_message, code, 0, 0) + if self._first_error is None or \ + self._first_error.message is None or \ + not self._first_error.message: + message = default_message + line = 0 + column = 0 + else: + message = self._first_error.message + code = self._first_error.type + line = self._first_error.line + column = self._first_error.column + if line > 0: + if column > 0: + message = "%s, line %d, column %d" % (message, line, column) + else: + message = "%s, line %d" % (message, line) + return exctype(message, code, line, column) + cdef _buildExceptionMessage(self, default_message): if self._first_error is None: return default_message @@ -393,7 +415,9 @@ cdef char* c_message cdef char* c_element cdef int i, text_size, element_size - if __DEBUG == 0 or msg is NULL or msg[0] == c'\n': + if __DEBUG == 0 or msg is NULL: + return + if msg[0] == c'\n' or msg[0] == c'\0': return cstd.va_start(args, msg) @@ -401,18 +425,19 @@ c_text = cstd.va_charptr(args) else: c_text = NULL - if cstd.strstr(msg, 'file %s') is not NULL: + if cstd.strstr(msg, 'file %s'): c_error.file = cstd.va_charptr(args) - if c_error.file is not NULL and cstd.strlen(c_error.file) > 0: - if cstd.strncmp(c_error.file, 'XSLT:', 5) == 0: - c_error.file = '' + if c_error.file and \ + cstd.strncmp(c_error.file, + 'string://__STRING__XSLT', 23) == 0: + c_error.file = '' else: c_error.file = NULL - if cstd.strstr(msg, 'line %d') is not NULL: + if cstd.strstr(msg, 'line %d'): c_error.line = cstd.va_int(args) else: c_error.line = -1 - if cstd.strstr(msg, 'element %s') is not NULL: + if cstd.strstr(msg, 'element %s'): c_element = cstd.va_charptr(args) else: c_element = NULL @@ -420,7 +445,17 @@ c_message = NULL if c_text is NULL: - c_error.message = '' + if c_element is not NULL and \ + cstd.strchr(msg, c'%') == cstd.strrchr(msg, c'%'): + # special case: a single occurrence of 'element %s' + text_size = cstd.strlen(msg) + element_size = cstd.strlen(c_element) + c_message = cstd.malloc( + (text_size + element_size + 1) * sizeof(char)) + cstd.sprintf(c_message, msg, c_element) + c_error.message = c_message + else: + c_error.message = '' elif c_element is NULL: c_error.message = c_text else: @@ -439,8 +474,7 @@ _forwardError(c_log_handler, &c_error) if c_message is not NULL: - cstd.free(c_error.message) - + cstd.free(c_message) ################################################################################ ## CONSTANTS FROM "xmlerror.h" (or rather libxml-xmlerror.html) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Wed Dec 5 19:30:47 2007 @@ -273,10 +273,8 @@ access_control=None): cdef xslt.xsltStylesheet* c_style cdef xmlDoc* c_doc - cdef xmlDoc* fake_c_doc cdef _Document doc cdef _Element root_node - cdef _ExsltRegExp _regexp doc = _documentOrRaise(xslt_input) root_node = _rootNodeOrRaise(xslt_input) @@ -308,10 +306,13 @@ tree.xmlFreeDoc(c_doc) self._xslt_resolver_context._raise_if_stored() # last error seems to be the most accurate here - if self._error_log.last_error is not None: - raise XSLTParseError, self._error_log.last_error.message + if self._error_log.last_error is not None and \ + self._error_log.last_error.message: + raise XSLTParseError(self._error_log.last_error.message) else: - raise XSLTParseError, "Cannot parse stylesheet" + raise XSLTParseError( + self._error_log._buildExceptionMessage( + "Cannot parse stylesheet")) c_doc._private = NULL # no longer used! self._c_style = c_style From scoder at codespeak.net Wed Dec 5 19:31:25 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 5 Dec 2007 19:31:25 +0100 (CET) Subject: [Lxml-checkins] r49401 - lxml/trunk/src/lxml/tests Message-ID: <20071205183125.8DD901684CC@codespeak.net> Author: scoder Date: Wed Dec 5 19:31:25 2007 New Revision: 49401 Modified: lxml/trunk/src/lxml/tests/test_xslt.py Log: additional XSLT doc resolver test case Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Wed Dec 5 19:31:25 2007 @@ -622,7 +622,6 @@ '{http://www.w3.org/1999/XSL/Transform}stylesheet') def test_xslt_document_error(self): - # make sure document('') works from parsed strings xslt = etree.XSLT(etree.XML("""\ @@ -633,6 +632,68 @@ """)) self.assertRaises(etree.XSLTApplyError, xslt, etree.XML('')) + def test_xslt_document_XML_resolver(self): + # make sure document('') works when custom resolvers are in use + assertEquals = self.assertEquals + called = {'count' : 0} + class TestResolver(etree.Resolver): + def resolve(self, url, id, context): + assertEquals(url, 'file://ANYTHING') + called['count'] += 1 + return self.resolve_string('', context) + + parser = etree.XMLParser() + parser.resolvers.add(TestResolver()) + + xslt = etree.XSLT(etree.XML("""\ + + + + + + + + + + + + + + + A + B + + +""", parser)) + + self.assertEquals(called['count'], 0) + result = xslt(etree.XML('')) + self.assertEquals(called['count'], 1) + + root = result.getroot() + self.assertEquals(root.tag, + 'test') + self.assertEquals(len(root), 4) + + self.assertEquals(root[0].tag, + 'CALLED') + self.assertEquals(root[1].tag, + '{local}entry') + self.assertEquals(root[1].text, + None) + self.assertEquals(root[1].get("value"), + 'A') + self.assertEquals(root[2].tag, + 'CALLED') + self.assertEquals(root[3].tag, + '{local}entry') + self.assertEquals(root[3].text, + None) + self.assertEquals(root[3].get("value"), + 'B') + def test_xslt_move_result(self): root = etree.XML('''\ From scoder at codespeak.net Wed Dec 5 19:31:51 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 5 Dec 2007 19:31:51 +0100 (CET) Subject: [Lxml-checkins] r49402 - lxml/trunk/src/lxml Message-ID: <20071205183151.8060D1684CD@codespeak.net> Author: scoder Date: Wed Dec 5 19:31:51 2007 New Revision: 49402 Modified: lxml/trunk/src/lxml/parsertarget.pxi Log: fix: memory leak in target parser Modified: lxml/trunk/src/lxml/parsertarget.pxi ============================================================================== --- lxml/trunk/src/lxml/parsertarget.pxi (original) +++ lxml/trunk/src/lxml/parsertarget.pxi Wed Dec 5 19:31:51 2007 @@ -115,5 +115,14 @@ cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser, xmlDoc* result, filename) except NULL: + if result is not NULL and result._private is NULL: + # no _Document proxy => orphen + tree.xmlFreeDoc(result) + if self._c_ctxt.myDoc is not NULL and \ + self._c_ctxt.myDoc is not result and \ + self._c_ctxt.myDoc._private is NULL: + # no _Document proxy => orphen + tree.xmlFreeDoc(self._c_ctxt.myDoc) + self._c_ctxt.myDoc = NULL self._raise_if_stored() raise _TargetParserResult(self._python_target.close()) From scoder at codespeak.net Wed Dec 5 19:32:16 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 5 Dec 2007 19:32:16 +0100 (CET) Subject: [Lxml-checkins] r49403 - lxml/trunk Message-ID: <20071205183216.9A91D1684CE@codespeak.net> Author: scoder Date: Wed Dec 5 19:32:16 2007 New Revision: 49403 Modified: lxml/trunk/CHANGES.txt Log: changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Dec 5 19:32:16 2007 @@ -2,6 +2,23 @@ lxml changelog ============== +Under development +================= + +Features added +-------------- + +* New properties ``position`` and ``code`` on ParseError exception (as + in ET 1.3) + +Bugs fixed +---------- + +* Minor bugs in XSLT error message formatting. + +* Result document memory leak in target parser. + + 2.0alpha5 (2007-11-24) ====================== From scoder at codespeak.net Wed Dec 5 19:33:05 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 5 Dec 2007 19:33:05 +0100 (CET) Subject: [Lxml-checkins] r49404 - lxml/trunk/src/lxml Message-ID: <20071205183305.B15B71684CE@codespeak.net> Author: scoder Date: Wed Dec 5 19:33:05 2007 New Revision: 49404 Modified: lxml/trunk/src/lxml/schematron.pxi Log: cleanup in schematron code Modified: lxml/trunk/src/lxml/schematron.pxi ============================================================================== --- lxml/trunk/src/lxml/schematron.pxi (original) +++ lxml/trunk/src/lxml/schematron.pxi Wed Dec 5 19:33:05 2007 @@ -81,8 +81,8 @@ cdef char* c_href cdef schematron.xmlSchematronParserCtxt* parser_ctxt if not config.ENABLE_SCHEMATRON: - raise SchematronError, \ - "lxml.etree was compiled without Schematron support." + raise SchematronError( + "lxml.etree was compiled without Schematron support.") self._c_schema = NULL if etree is not None: doc = _documentOrRaise(etree) @@ -98,7 +98,7 @@ parser_ctxt = schematron.xmlSchematronNewParserCtxt(_cstr(filename)) c_doc = NULL else: - raise SchematronParseError, "No tree or file given" + raise SchematronParseError("No tree or file given") if parser_ctxt is NULL: python.PyErr_NoMemory() @@ -107,7 +107,8 @@ schematron.xmlSchematronFreeParserCtxt(parser_ctxt) if self._c_schema is NULL: - raise SchematronParseError, "Document is not a valid Schematron schema" + raise SchematronParseError( + "Document is not a valid Schematron schema") _Validator.__init__(self) def __dealloc__(self): @@ -127,27 +128,28 @@ doc = _documentOrRaise(etree) root_node = _rootNodeOrRaise(etree) - self._error_log.connect() options = schematron.XML_SCHEMATRON_OUT_QUIET - if tree.LIBXML_VERSION <= 20629: - # hack to switch off stderr output - options = options | schematron.XML_SCHEMATRON_OUT_XML + #if tree.LIBXML_VERSION <= 20630: # ... and later? + # hack to switch off stderr output + options = options | schematron.XML_SCHEMATRON_OUT_XML + valid_ctxt = schematron.xmlSchematronNewValidCtxt( self._c_schema, options) if valid_ctxt is NULL: - self._error_log.disconnect() - raise SchematronError, "Failed to create validation context" + raise SchematronError("Failed to create validation context") + self._error_log.connect() c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) with nogil: ret = schematron.xmlSchematronValidateDoc(valid_ctxt, c_doc) _destroyFakeDoc(doc._c_doc, c_doc) + self._error_log.disconnect() schematron.xmlSchematronFreeValidCtxt(valid_ctxt) - self._error_log.disconnect() if ret == -1: - raise SchematronValidateError, "Internal error in Schematron validation" + raise SchematronValidateError( + "Internal error in Schematron validation") if ret == 0: return True else: From scoder at codespeak.net Sat Dec 8 16:06:43 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 8 Dec 2007 16:06:43 +0100 (CET) Subject: [Lxml-checkins] r49552 - lxml/trunk/src/lxml Message-ID: <20071208150643.8BB311684C2@codespeak.net> Author: scoder Date: Sat Dec 8 16:06:42 2007 New Revision: 49552 Modified: lxml/trunk/src/lxml/xslt.pxi Log: Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Sat Dec 8 16:06:42 2007 @@ -316,7 +316,6 @@ c_doc._private = NULL # no longer used! self._c_style = c_style - self._context = _XSLTContext(None, extensions, regexp) def __dealloc__(self): From scoder at codespeak.net Sat Dec 8 16:06:46 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 8 Dec 2007 16:06:46 +0100 (CET) Subject: [Lxml-checkins] r49553 - in lxml/trunk: . doc src/lxml Message-ID: <20071208150646.1E6831684C1@codespeak.net> Author: scoder Date: Sat Dec 8 16:06:45 2007 New Revision: 49553 Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/extensions.txt lxml/trunk/src/lxml/extensions.pxi Log: Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Dec 8 16:06:45 2007 @@ -18,6 +18,12 @@ * Result document memory leak in target parser. +Other changes +------------- + +* Second argument to ``lxml.etree.Extension()`` helper is no longer + required, third argument is now a keyword-only argument ``ns``. + 2.0alpha5 (2007-11-24) ====================== Modified: lxml/trunk/doc/extensions.txt ============================================================================== --- lxml/trunk/doc/extensions.txt (original) +++ lxml/trunk/doc/extensions.txt Sat Dec 8 16:06:45 2007 @@ -217,20 +217,21 @@ >>> ext_module = MyExt() >>> functions = ('function1', 'function2') - >>> extensions = etree.Extension( ext_module, functions, 'local-ns' ) + >>> extensions = etree.Extension( ext_module, functions, ns='local-ns' ) >>> e = etree.XPathEvaluator(doc, namespaces=namespaces, extensions=extensions) >>> print e.evaluate('l:function1(string(b))') 1Haegar -The second argument to ``Extension`` can either be be a sequence of names to -select from the module, a dictionary that explicitly maps function names to -their XPath alter-ego or ``None`` (explicitly passed) to take all available -functions under their original name (if their name does not start with '_'). - -The third argument takes a namespace URI or ``None`` (also if left out) for -the default namespace. The following examples will therefore all do the same -thing:: +The optional second argument to ``Extension`` can either be be a +sequence of names to select from the module, a dictionary that +explicitly maps function names to their XPath alter-ego or ``None`` +(explicitly passed) to take all available functions under their +original name (if their name does not start with '_'). + +The additional ``ns`` keyword argument takes a namespace URI or +``None`` (also if left out) for the default namespace. The following +examples will therefore all do the same thing:: >>> functions = ('function1', 'function2', 'function3') >>> extensions = etree.Extension( ext_module, functions ) @@ -238,12 +239,12 @@ >>> print e.evaluate('function1(function2(function3(string(b))))') 123Haegar - >>> extensions = etree.Extension( ext_module, functions, None ) + >>> extensions = etree.Extension( ext_module, functions, ns=None ) >>> e = etree.XPathEvaluator(doc, extensions=extensions) >>> print e.evaluate('function1(function2(function3(string(b))))') 123Haegar - >>> extensions = etree.Extension( ext_module, None ) + >>> extensions = etree.Extension(ext_module) >>> e = etree.XPathEvaluator(doc, extensions=extensions) >>> print e.evaluate('function1(function2(function3(string(b))))') 123Haegar @@ -253,15 +254,15 @@ ... 'function2' : 'function2', ... 'function3' : 'function3' ... } - >>> extensions = etree.Extension( ext_module, functions ) + >>> extensions = etree.Extension(ext_module, functions) >>> e = etree.XPathEvaluator(doc, extensions=extensions) >>> print e.evaluate('function1(function2(function3(string(b))))') 123Haegar For convenience, you can also pass a sequence of extensions:: - >>> extensions1 = etree.Extension( ext_module, None ) - >>> extensions2 = etree.Extension( ext_module, None, 'local-ns' ) + >>> extensions1 = etree.Extension(ext_module) + >>> extensions2 = etree.Extension(ext_module, ns='local-ns') >>> e = etree.XPathEvaluator(doc, extensions=[extensions1, extensions2], ... namespaces=namespaces) >>> print e.evaluate('function1(l:function2(function3(string(b))))') Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Sat Dec 8 16:06:45 2007 @@ -323,8 +323,17 @@ #print "Holding document:", element._doc._c_doc self._temp_refs.add((<_Element>o)._doc) +def Extension(module, function_mapping=None, *, ns=None): + """Build a dictionary of extension functions from the functions + defined in a module or the methods of an object. + + As second argument, you can pass an additional mapping of + attribute names to XPath function names, or a list of function + names that should be taken. -def Extension(module, function_mapping, ns=None): + The ``ns`` keyword argument accepts a namespace URI for the XPath + functions. + """ functions = {} if python.PyDict_Check(function_mapping): for function_name, xpath_name in function_mapping.items(): @@ -332,16 +341,13 @@ getattr(module, function_name)) else: if function_mapping is None: - function_mapping = [] - for name in dir(module): - if not name.startswith('_'): - python.PyList_Append(function_mapping, name) + function_mapping = [ name for name in dir(module) + if not name.startswith('_') ] for function_name in function_mapping: python.PyDict_SetItem(functions, (ns, function_name), getattr(module, function_name)) return functions - ################################################################################ # EXSLT regexp implementation From scoder at codespeak.net Sat Dec 8 16:06:50 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 8 Dec 2007 16:06:50 +0100 (CET) Subject: [Lxml-checkins] r49554 - lxml/trunk/src/lxml Message-ID: <20071208150650.4970C16851F@codespeak.net> Author: scoder Date: Sat Dec 8 16:06:49 2007 New Revision: 49554 Modified: lxml/trunk/src/lxml/extensions.pxi Log: Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Sat Dec 8 16:06:49 2007 @@ -81,12 +81,14 @@ ns_uri_utf = self._to_utf(ns_uri) python.PyList_Append(ns, (prefix_utf, ns_uri_utf)) namespaces = ns + else: + namespaces = None self._doc = None self._exc = _ExceptionContext() self._extensions = extensions self._namespaces = namespaces - self._temp_refs = _TempStore() + self._temp_refs = _TempStore() if enable_regexp: _regexp = _ExsltRegExp() From scoder at codespeak.net Sat Dec 8 16:06:53 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 8 Dec 2007 16:06:53 +0100 (CET) Subject: [Lxml-checkins] r49555 - lxml/trunk/src/lxml Message-ID: <20071208150653.D3D1C168523@codespeak.net> Author: scoder Date: Sat Dec 8 16:06:53 2007 New Revision: 49555 Modified: lxml/trunk/src/lxml/lxml.etree.pyx Log: Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Sat Dec 8 16:06:53 2007 @@ -1746,8 +1746,12 @@ tree.xmlFree(c_result) return 1 - def __richcmp__(self, other, int op): - return python.PyObject_RichCompare(dict(self), other, op) + def __richcmp__(one, other, int op): + if not python.PyDict_Check(one): + one = dict(one) + if not python.PyDict_Check(other): + other = dict(other) + return python.PyObject_RichCompare(one, other, op) cdef class _AttribIterator: """Attribute iterator - for internal use only! From scoder at codespeak.net Sat Dec 8 16:06:55 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 8 Dec 2007 16:06:55 +0100 (CET) Subject: [Lxml-checkins] r49556 - lxml/trunk/src/lxml Message-ID: <20071208150655.06DD3168541@codespeak.net> Author: scoder Date: Sat Dec 8 16:06:55 2007 New Revision: 49556 Modified: lxml/trunk/src/lxml/lxml.etree.pyx Log: Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Sat Dec 8 16:06:55 2007 @@ -215,6 +215,14 @@ cdef class QName: """QName wrapper. + + Pass a tag name by itself or a namespace URI and a tag name to + create a qualified name. The ``text`` property holds the + qualified name in ``{namespace}tagname`` notation. + + You can pass QName objects wherever a tag name is expected. Also, + setting Element text from a QName will resolve the namespace + prefix and set a qualified text value. """ cdef readonly object text def __init__(self, text_or_uri, tag=None): From scoder at codespeak.net Sat Dec 8 16:06:59 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 8 Dec 2007 16:06:59 +0100 (CET) Subject: [Lxml-checkins] r49557 - lxml/trunk/src/lxml Message-ID: <20071208150659.57AEA1684C2@codespeak.net> Author: scoder Date: Sat Dec 8 16:06:58 2007 New Revision: 49557 Modified: lxml/trunk/src/lxml/parser.pxi Log: Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sat Dec 8 16:06:58 2007 @@ -1284,6 +1284,10 @@ ############################################################ cdef class TreeBuilder(_SaxParserTarget): + """Parser target that builds a tree. + + The final tree is returned by the ``close()`` method. + """ cdef _BaseParser _parser cdef object _factory cdef object _data From scoder at codespeak.net Sat Dec 8 16:07:02 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 8 Dec 2007 16:07:02 +0100 (CET) Subject: [Lxml-checkins] r49558 - lxml/trunk/doc Message-ID: <20071208150702.618C01684D7@codespeak.net> Author: scoder Date: Sat Dec 8 16:07:01 2007 New Revision: 49558 Modified: lxml/trunk/doc/compatibility.txt Log: Modified: lxml/trunk/doc/compatibility.txt ============================================================================== --- lxml/trunk/doc/compatibility.txt (original) +++ lxml/trunk/doc/compatibility.txt Sat Dec 8 16:07:01 2007 @@ -3,10 +3,10 @@ ============================= A lot of care has been taken to ensure compatibility between etree and -ElementTree. Nonetheless some differences and incompatibilities exist: +ElementTree. Nonetheless, some differences and incompatibilities exist: -* Importing etree is obviously different; etree uses a lower case - package name, while ElementTree a combination of upper-case and +* Importing etree is obviously different; etree uses a lower-case + package name, while ElementTree uses a combination of upper-case and lower case in imports:: # etree @@ -89,9 +89,10 @@ API. In general, etree tries to avoid AssertionErrors in favour of being more specific about the reason for the exception. -* When parsing fails in ``iterparse()``, ElementTree raises a low-level - ExpatError instead of a SyntaxError as the other parsers. lxml.etree - follows the other parts of the parser API and raises an (XML)SyntaxError. +* When parsing fails in ``iterparse()``, ElementTree up to version + 1.2.x raises a low-level ``ExpatError`` instead of a ``SyntaxError`` + as the other parsers. Both lxml and ElementTree 1.3 raise a + ``ParseError`` for parser errors. * The ``iterparse()`` function in lxml is implemented based on the libxml2 parser and tree generator. This means that modifications of the document @@ -115,8 +116,17 @@ tries to provide a default setup that is as close to the ElementTree parser as possible. -* ElementTree has a bug when serializing an empty Comment (no text argument - given) to XML, etree serializes this successfully. +* The ``TreeBuilder`` class of ``lxml.etree`` uses a different + signature for the ``start()`` method. It accepts an additional + argument ``nsmap`` to propagate the namespace declarations of an + element in addition to its own namespace. To assure compatibility + with ElementTree (which does not support this argument), lxml checks + if the method accepts 3 arguments before calling it, and otherwise + drops the namespace mapping. This should work with most existing + ElementTree code, although there may still be conflicting cases. + +* ElementTree 1.2 has a bug when serializing an empty Comment (no text + argument given) to XML, etree serializes this successfully. * ElementTree adds whitespace around comments on serialization, lxml does not. This means that a comment text "text" that ElementTree serializes as From scoder at codespeak.net Sat Dec 8 16:07:05 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 8 Dec 2007 16:07:05 +0100 (CET) Subject: [Lxml-checkins] r49559 - lxml/trunk Message-ID: <20071208150705.5BCED168542@codespeak.net> Author: scoder Date: Sat Dec 8 16:07:04 2007 New Revision: 49559 Modified: lxml/trunk/INSTALL.txt Log: Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Sat Dec 8 16:07:04 2007 @@ -34,7 +34,7 @@ This has been reported to work on Linux, MacOS-X 10.4 and Windows, as long as libxml2 and libxslt are properly installed (including development packages, -i.e. header files etc.). +i.e. header files, etc.). Building lxml from sources @@ -84,3 +84,5 @@ ``DYLD_LIBRARY_PATH`` to the directory where fink keeps the libraries. .. _fink: http://finkproject.org/ + +A MacPort of lxml is available. Try ``port install py25-lxml``. From lxml-checkins at codespeak.net Wed Dec 12 03:39:24 2007 From: lxml-checkins at codespeak.net (VIAGRA ® Official Site) Date: Wed, 12 Dec 2007 03:39:24 +0100 (CET) Subject: [Lxml-checkins] December 73% OFF Message-ID: <20071212163926.6712.qmail@mx-ll-58.147.38-21.tttmaxnet.com> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20071212/ddc196c8/attachment.htm From lxml-checkins at codespeak.net Thu Dec 13 19:33:43 2007 From: lxml-checkins at codespeak.net (VIAGRA ® Official Site) Date: Thu, 13 Dec 2007 19:33:43 +0100 (CET) Subject: [Lxml-checkins] December 79% OFF Message-ID: <20071213-43344.4862.qmail@client-201.240.53.123.speedy.net.pe> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20071213/df75b6ea/attachment.htm From scoder at codespeak.net Fri Dec 14 08:42:38 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 14 Dec 2007 08:42:38 +0100 (CET) Subject: [Lxml-checkins] r49760 - in lxml/trunk: . src/lxml Message-ID: <20071214074238.4049216851B@codespeak.net> Author: scoder Date: Fri Dec 14 08:42:36 2007 New Revision: 49760 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r3087 at delle: sbehnel | 2007-12-09 17:54:38 +0100 cleanup Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Dec 14 08:42:36 2007 @@ -639,7 +639,7 @@ if self._c_node.parent != NULL and not _isElement(self._c_node.parent): if element._c_node.type != tree.XML_PI_NODE: if element._c_node.type != tree.XML_COMMENT_NODE: - raise TypeError, "Only processing instructions and comments can be siblings of the root element" + raise TypeError("Only processing instructions and comments can be siblings of the root element") element.tail = None _appendSibling(self, element) @@ -654,7 +654,7 @@ if self._c_node.parent != NULL and not _isElement(self._c_node.parent): if element._c_node.type != tree.XML_PI_NODE: if element._c_node.type != tree.XML_COMMENT_NODE: - raise TypeError, "Only processing instructions and comments can be siblings of the root element" + raise TypeError("Only processing instructions and comments can be siblings of the root element") element.tail = None _prependSibling(self, element) From scoder at codespeak.net Fri Dec 14 08:42:43 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 14 Dec 2007 08:42:43 +0100 (CET) Subject: [Lxml-checkins] r49761 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20071214074243.9F74D16851D@codespeak.net> Author: scoder Date: Fri Dec 14 08:42:43 2007 New Revision: 49761 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/serializer.pxi lxml/trunk/src/lxml/tests/test_etree.py lxml/trunk/src/lxml/tests/test_sax.py Log: r3088 at delle: sbehnel | 2007-12-09 17:56:04 +0100 on serialisation: add new lines after root-level processing instructions and comments Modified: lxml/trunk/src/lxml/serializer.pxi ============================================================================== --- lxml/trunk/src/lxml/serializer.pxi (original) +++ lxml/trunk/src/lxml/serializer.pxi Fri Dec 14 08:42:43 2007 @@ -236,12 +236,13 @@ # we are at a root node, so add PI and comment siblings c_sibling = c_node while c_sibling.prev != NULL and \ - (c_sibling.prev.type == tree.XML_PI_NODE or \ - c_sibling.prev.type == tree.XML_COMMENT_NODE): + (c_sibling.prev.type == tree.XML_PI_NODE or \ + c_sibling.prev.type == tree.XML_COMMENT_NODE): c_sibling = c_sibling.prev while c_sibling != c_node: tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0, pretty_print, encoding) + tree.xmlOutputBufferWriteString(c_buffer, "\n") c_sibling = c_sibling.next cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, @@ -252,8 +253,9 @@ # we are at a root node, so add PI and comment siblings c_sibling = c_node.next while c_sibling != NULL and \ - (c_sibling.type == tree.XML_PI_NODE or \ - c_sibling.type == tree.XML_COMMENT_NODE): + (c_sibling.type == tree.XML_PI_NODE or \ + c_sibling.type == tree.XML_COMMENT_NODE): + tree.xmlOutputBufferWriteString(c_buffer, "\n") tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0, pretty_print, encoding) c_sibling = c_sibling.next Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Fri Dec 14 08:42:43 2007 @@ -284,7 +284,7 @@ tostring = self.etree.tostring XMLParser = self.etree.XMLParser - xml = '' + xml = '\n\n' f = StringIO(xml) tree = parse(f) Modified: lxml/trunk/src/lxml/tests/test_sax.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_sax.py (original) +++ lxml/trunk/src/lxml/tests/test_sax.py Fri Dec 14 08:42:43 2007 @@ -44,9 +44,9 @@ xml_out) def test_etree_sax_pi_root(self): - tree = self.parse('ab') + tree = self.parse('\nab') xml_out = self._saxify_serialize(tree) - self.assertEquals('ab', + self.assertEquals('\nab', xml_out) def test_etree_sax_attributes(self): From scoder at codespeak.net Fri Dec 14 08:42:53 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 14 Dec 2007 08:42:53 +0100 (CET) Subject: [Lxml-checkins] r49762 - in lxml/trunk: . doc src/lxml src/lxml/tests Message-ID: <20071214074253.F408416851B@codespeak.net> Author: scoder Date: Fri Dec 14 08:42:53 2007 New Revision: 49762 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/doc/api.txt lxml/trunk/doc/parsing.txt lxml/trunk/doc/tutorial.txt lxml/trunk/selftest.py lxml/trunk/selftest2.py lxml/trunk/src/lxml/serializer.pxi lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_etree.py lxml/trunk/src/lxml/tests/test_io.py lxml/trunk/src/lxml/tests/test_sax.py lxml/trunk/src/lxml/tests/test_xslt.py Log: r3089 at delle: sbehnel | 2007-12-09 18:45:51 +0100 append newline at the end of serialised documents Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Dec 14 08:42:53 2007 @@ -21,6 +21,10 @@ Other changes ------------- +* The serialisation of ElementTree objects now appends a newline at + the end of the document and also inserts newlines between the + top-level processing instructions and comments + * Second argument to ``lxml.etree.Extension()`` helper is no longer required, third argument is now a keyword-only argument ``ns``. Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Fri Dec 14 08:42:53 2007 @@ -112,6 +112,11 @@ d >>> print etree.tostring(tree) + + +ElementTree objects are serialised as complete documents, including +preceding or trailing processing instructions and comments. Mind also +the additional line break at the end. All operations that you run on such an ElementTree (like XPath, XSLT, etc.) will understand the explicitly chosen root as root node of a document. They Modified: lxml/trunk/doc/parsing.txt ============================================================================== --- lxml/trunk/doc/parsing.txt (original) +++ lxml/trunk/doc/parsing.txt Fri Dec 14 08:42:53 2007 @@ -512,9 +512,9 @@ >>> etree.tounicode(el) u'' - >>> et = etree.ElementTree(el) - >>> etree.tounicode(et) - u'' + >>> tree = etree.ElementTree(el) + >>> etree.tounicode(tree) + u'\n' The result of ``tounicode()`` can be treated like any other Python unicode string and then passed back into the parsers. However, if you want to save Modified: lxml/trunk/doc/tutorial.txt ============================================================================== --- lxml/trunk/doc/tutorial.txt (original) +++ lxml/trunk/doc/tutorial.txt Fri Dec 14 08:42:53 2007 @@ -396,12 +396,13 @@ The ElementTree class ===================== -An ``ElementTree`` is mainly a document wrapper around a tree with a root -node. It provides a couple of methods for parsing, serialisation and general -document handling. One of the bigger differences is that it serialises as a -complete document, as opposed to a single ``Element``. This includes top-level -processing instructions and comments, as well as a DOCTYPE and other DTD -content in the document:: +An ``ElementTree`` is mainly a document wrapper around a tree with a +root node. It provides a couple of methods for parsing, serialisation +and general document handling. One of the bigger differences is that +it serialises as a complete document, as opposed to a single +``Element``. This includes top-level processing instructions and +comments, an additional line break at the end, as well as a DOCTYPE +and other DTD content in the document:: >>> from StringIO import StringIO >>> tree = etree.parse(StringIO('''\ @@ -423,6 +424,7 @@ eggs + >>> # lxml 1.3.4 and later >>> print etree.tostring(etree.ElementTree(tree.getroot())) @@ -432,6 +434,7 @@ eggs + >>> # ElementTree and lxml <= 1.3.3 >>> print etree.tostring(tree.getroot()) @@ -492,6 +495,7 @@ >>> print etree.tostring(tree) data + Note that ``parse()`` returns an ElementTree object, not an Element object as the string parser functions:: @@ -551,10 +555,11 @@ ... except StopIteration: ... return "" - >>> root = etree.parse(DataSource()) + >>> tree = etree.parse(DataSource()) - >>> print etree.tostring(root) + >>> print etree.tostring(tree) + The second way is through a feed parser interface, given by the ``feed(data)`` and ``close()`` methods:: Modified: lxml/trunk/selftest.py ============================================================================== --- lxml/trunk/selftest.py (original) +++ lxml/trunk/selftest.py Fri Dec 14 08:42:53 2007 @@ -25,6 +25,8 @@ def fix_compatibility(xml_data): xml_data = re.sub('\s*xmlns:[a-z0-9]+="http://www.w3.org/2001/XInclude"', '', xml_data) xml_data = xml_data.replace(' />', '/>') + if xml_data[-1:] == '\n': + xml_data = xml_data[:-1] return xml_data def serialize(elem, **options): Modified: lxml/trunk/selftest2.py ============================================================================== --- lxml/trunk/selftest2.py (original) +++ lxml/trunk/selftest2.py Fri Dec 14 08:42:53 2007 @@ -22,7 +22,11 @@ tree.write(file, encoding=encoding) else: tree.write(file) - return file.getvalue().replace(' />', '/>') + result = file.getvalue() + result = result.replace(' />', '/>') + if result[-1:] == '\n': + result = result[:-1] + return result def summarize(elem): return elem.tag Modified: lxml/trunk/src/lxml/serializer.pxi ============================================================================== --- lxml/trunk/src/lxml/serializer.pxi (original) +++ lxml/trunk/src/lxml/serializer.pxi Fri Dec 14 08:42:53 2007 @@ -172,6 +172,7 @@ _writeTail(c_buffer, c_node, encoding, pretty_print) if write_complete_document: _writeNextSiblings(c_buffer, c_node, encoding, pretty_print) + tree.xmlOutputBufferWriteString(c_buffer, "\n") cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer, char* version, char* encoding) nogil: Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Fri Dec 14 08:42:53 2007 @@ -734,7 +734,7 @@ tree = ElementTree(element=html) f = StringIO() tree.write(f, method="html") - data = f.getvalue() + data = f.getvalue().replace('\n','') self.assertEquals('

html
test

', data) @@ -2541,7 +2541,7 @@ tree = ElementTree(element=a) tree.write(f, encoding='utf-8') self.assertEquals(u'
S?k p? nettet'.encode('UTF-8'), - f.getvalue()) + f.getvalue().replace('\n','')) def test_parse_file_encoding(self): parse = self.etree.parse @@ -2574,9 +2574,7 @@ result = f.getvalue() declaration = "" self.assertEncodingDeclaration(result,'iso-8859-1') - result = result.split('?>', 1)[-1] - if result[0] == '\n': - result = result[1:] + result = result.split('?>', 1)[-1].replace('\n','') self.assertEquals(u'S?k p? nettet'.encode('iso-8859-1'), result) @@ -2629,7 +2627,7 @@ f = StringIO() tree = ElementTree(element=a) tree.write(f) - data = f.getvalue() + data = f.getvalue().replace('\n','') self.assertEquals( u'S?k p? nettet'.encode('ASCII', 'xmlcharrefreplace'), data) Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Fri Dec 14 08:42:53 2007 @@ -268,23 +268,23 @@ f.close() def test_parse_remove_comments(self): - parse = self.etree.parse + fromstring = self.etree.fromstring tostring = self.etree.tostring XMLParser = self.etree.XMLParser - f = StringIO('') + xml = '' parser = XMLParser(remove_comments=True) - tree = parse(f, parser) + root = fromstring(xml, parser) self.assertEquals( '', - tostring(tree)) + tostring(root)) def test_parse_remove_pis(self): parse = self.etree.parse tostring = self.etree.tostring XMLParser = self.etree.XMLParser - xml = '\n\n' + xml = '\n\n\n' f = StringIO(xml) tree = parse(f) @@ -292,11 +292,10 @@ xml, tostring(tree)) - f = StringIO(xml) parser = XMLParser(remove_pis=True) tree = parse(f, parser) self.assertEquals( - '', + '\n', tostring(tree)) def test_parse_parser_type_error(self): @@ -1325,13 +1324,13 @@ def test_namespaces_reuse_after_move(self): ns_href = "http://a.b.c" - one = self.etree.parse( - StringIO('' % ns_href)) - baz = one.getroot()[0][0] - - two = self.etree.parse( - StringIO('' % ns_href)) - two.getroot().append(baz) + one = self.etree.fromstring( + '' % ns_href) + baz = one[0][0] + + two = self.etree.fromstring( + '' % ns_href) + two.append(baz) del one # make sure the source document is deallocated self.assertEquals('{%s}baz' % ns_href, baz.tag) @@ -1811,7 +1810,7 @@ self.assertEquals(docinfo.system_url, None) self.assertEquals(docinfo.root_name, 'html') self.assertEquals(docinfo.doctype, '') - + def test_dtd_io(self): # check that DTDs that go in also go back out xml = '''\ @@ -1820,10 +1819,10 @@ ]> - test-test\ + test-test ''' - root = self.etree.parse(StringIO(xml)) - self.assertEqual(self.etree.tostring(root).replace(" ", ""), + tree = self.etree.parse(StringIO(xml)) + self.assertEqual(self.etree.tostring(tree).replace(" ", ""), xml.replace(" ", "")) def test_byte_zero(self): Modified: lxml/trunk/src/lxml/tests/test_io.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_io.py (original) +++ lxml/trunk/src/lxml/tests/test_io.py Fri Dec 14 08:42:53 2007 @@ -70,7 +70,8 @@ handle, filename = tempfile.mkstemp(suffix=".xml") self.tree.write(filename) try: - self.assertEqual(open(filename).read(), self.root_str) + self.assertEqual(open(filename).read().replace('\n', ''), + self.root_str) finally: os.close(handle) os.remove(filename) Modified: lxml/trunk/src/lxml/tests/test_sax.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_sax.py (original) +++ lxml/trunk/src/lxml/tests/test_sax.py Fri Dec 14 08:42:53 2007 @@ -44,9 +44,9 @@ xml_out) def test_etree_sax_pi_root(self): - tree = self.parse('\nab') + tree = self.parse('ab') xml_out = self._saxify_serialize(tree) - self.assertEquals('\nab', + self.assertEquals('ab', xml_out) def test_etree_sax_attributes(self): @@ -211,7 +211,7 @@ new_tree = self._saxify_unsaxify(tree) f = StringIO() new_tree.write(f) - return f.getvalue() + return f.getvalue().replace('\n', '') def test_suite(): Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Fri Dec 14 08:42:53 2007 @@ -139,12 +139,12 @@ st = etree.XSLT(style) res = st.apply(tree) expected = u"""\ - +\ \uF8D2""" f = StringIO() res.write(f, encoding='UTF-16') - result = unicode(f.getvalue(), 'UTF-16') + result = unicode(f.getvalue(), 'UTF-16').replace('\n', '') self.assertEquals(expected, result) From scoder at codespeak.net Fri Dec 14 08:42:58 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 14 Dec 2007 08:42:58 +0100 (CET) Subject: [Lxml-checkins] r49763 - in lxml/trunk: . src/lxml Message-ID: <20071214074258.C0F2916851E@codespeak.net> Author: scoder Date: Fri Dec 14 08:42:58 2007 New Revision: 49763 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r3090 at delle: sbehnel | 2007-12-10 00:12:12 +0100 fix for parser memory leak after switching to Cython 0.9.6.8 Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Dec 14 08:42:58 2007 @@ -2276,7 +2276,7 @@ cdef _Document doc try: doc = _parseDocument(source, parser) - return ElementTree(doc.getroot()) + return _elementTreeFactory(doc, None) except _TargetParserResult, result_container: return result_container.result From scoder at codespeak.net Fri Dec 14 08:43:02 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 14 Dec 2007 08:43:02 +0100 (CET) Subject: [Lxml-checkins] r49764 - in lxml/trunk: . src/lxml Message-ID: <20071214074302.8C5CB168521@codespeak.net> Author: scoder Date: Fri Dec 14 08:43:02 2007 New Revision: 49764 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r3091 at delle: sbehnel | 2007-12-10 00:12:34 +0100 dropped redundant code Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Dec 14 08:43:02 2007 @@ -2172,8 +2172,6 @@ (DTD, XInclude, ...). """ cdef _Document doc - if parser is None: - parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() try: doc = _parseMemoryDocument(text, base_url, parser) return doc.getroot() From scoder at codespeak.net Fri Dec 14 08:43:05 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 14 Dec 2007 08:43:05 +0100 (CET) Subject: [Lxml-checkins] r49765 - in lxml/trunk: . src/lxml Message-ID: <20071214074305.DF526168521@codespeak.net> Author: scoder Date: Fri Dec 14 08:43:05 2007 New Revision: 49765 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/serializer.pxi Log: r3092 at delle: sbehnel | 2007-12-12 20:24:55 +0100 only append newlines when pretty printing Modified: lxml/trunk/src/lxml/serializer.pxi ============================================================================== --- lxml/trunk/src/lxml/serializer.pxi (original) +++ lxml/trunk/src/lxml/serializer.pxi Fri Dec 14 08:43:05 2007 @@ -172,7 +172,8 @@ _writeTail(c_buffer, c_node, encoding, pretty_print) if write_complete_document: _writeNextSiblings(c_buffer, c_node, encoding, pretty_print) - tree.xmlOutputBufferWriteString(c_buffer, "\n") + if pretty_print: + tree.xmlOutputBufferWriteString(c_buffer, "\n") cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer, char* version, char* encoding) nogil: @@ -243,7 +244,8 @@ while c_sibling != c_node: tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0, pretty_print, encoding) - tree.xmlOutputBufferWriteString(c_buffer, "\n") + if pretty_print: + tree.xmlOutputBufferWriteString(c_buffer, "\n") c_sibling = c_sibling.next cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, @@ -256,7 +258,8 @@ while c_sibling != NULL and \ (c_sibling.type == tree.XML_PI_NODE or \ c_sibling.type == tree.XML_COMMENT_NODE): - tree.xmlOutputBufferWriteString(c_buffer, "\n") + if pretty_print: + tree.xmlOutputBufferWriteString(c_buffer, "\n") tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0, pretty_print, encoding) c_sibling = c_sibling.next @@ -407,5 +410,7 @@ c_buffer = tree.xmlOutputBufferCreateFile(python.PyFile_AsFile(f), NULL) tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, pretty_print, NULL) _writeTail(c_buffer, c_node, NULL, 0) - tree.xmlOutputBufferWriteString(c_buffer, '\n') + if not pretty_print: + # not written yet + tree.xmlOutputBufferWriteString(c_buffer, '\n') tree.xmlOutputBufferFlush(c_buffer) From scoder at codespeak.net Fri Dec 14 08:47:10 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 14 Dec 2007 08:47:10 +0100 (CET) Subject: [Lxml-checkins] r49766 - lxml/trunk Message-ID: <20071214074710.41A611684E6@codespeak.net> Author: scoder Date: Fri Dec 14 08:47:09 2007 New Revision: 49766 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r3099 at delle: sbehnel | 2007-12-14 08:47:04 +0100 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Dec 14 08:47:09 2007 @@ -14,6 +14,8 @@ Bugs fixed ---------- +* Memory leak in the ``parse()`` function. + * Minor bugs in XSLT error message formatting. * Result document memory leak in target parser. @@ -21,9 +23,9 @@ Other changes ------------- -* The serialisation of ElementTree objects now appends a newline at - the end of the document and also inserts newlines between the - top-level processing instructions and comments +* The 'pretty printed' serialisation of ElementTree objects now + appends a newline at the end of the document and also inserts + newlines between the top-level processing instructions and comments * Second argument to ``lxml.etree.Extension()`` helper is no longer required, third argument is now a keyword-only argument ``ns``. From ianb at codespeak.net Tue Dec 18 22:04:53 2007 From: ianb at codespeak.net (ianb at codespeak.net) Date: Tue, 18 Dec 2007 22:04:53 +0100 (CET) Subject: [Lxml-checkins] r49903 - in lxml/trunk: . src/lxml/html Message-ID: <20071218210453.933281684F1@codespeak.net> Author: ianb Date: Tue Dec 18 22:04:53 2007 New Revision: 49903 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/html/__init__.py Log: Added encoding argument to lxml.html.tostring Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Dec 18 22:04:53 2007 @@ -30,6 +30,7 @@ * Second argument to ``lxml.etree.Extension()`` helper is no longer required, third argument is now a keyword-only argument ``ns``. +* ``lxml.html.tostring`` takes an ``encoding`` argument. 2.0alpha5 (2007-11-24) ====================== Modified: lxml/trunk/src/lxml/html/__init__.py ============================================================================== --- lxml/trunk/src/lxml/html/__init__.py (original) +++ lxml/trunk/src/lxml/html/__init__.py Tue Dec 18 22:04:53 2007 @@ -1259,15 +1259,18 @@ __replace_meta_content_type = re.compile( r'').sub -def tostring(doc, pretty_print=False, include_meta_content_type=False): +def tostring(doc, pretty_print=False, include_meta_content_type=False, + encoding=None): """ - return HTML string representation of the document given + return HTML string representation of the document given - note: this will create a meta http-equiv="Content" tag in the head - and may replace any that are present + note: if include_meta_content_type is true this will create a meta + http-equiv="Content" tag in the head; regardless of the value of include_meta_content_type + any existing meta http-equiv="Content" tag will be removed """ assert doc is not None - html = etree.tostring(doc, method="html", pretty_print=pretty_print) + html = etree.tostring(doc, method="html", pretty_print=pretty_print, + encoding=encoding) if not include_meta_content_type: html = __replace_meta_content_type('', html) return html From scoder at codespeak.net Tue Dec 18 22:28:39 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Dec 2007 22:28:39 +0100 (CET) Subject: [Lxml-checkins] r49904 - in lxml/trunk: . doc Message-ID: <20071218212839.E4021168443@codespeak.net> Author: scoder Date: Tue Dec 18 22:28:37 2007 New Revision: 49904 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/validation.txt Log: r3101 at delle: sbehnel | 2007-12-18 09:24:09 +0100 small doc update Modified: lxml/trunk/doc/validation.txt ============================================================================== --- lxml/trunk/doc/validation.txt (original) +++ lxml/trunk/doc/validation.txt Tue Dec 18 22:28:37 2007 @@ -232,10 +232,10 @@ Schematron ---------- -Since version 2.0, lxml.etree features Schematron_ support, using the class -lxml.etree.Schematron. It requires libxml2 2.6.21. The API is the same as -for the other validators. Pass an ElementTree object to construct a -Schematron validator:: +Since version 2.0, lxml.etree features Schematron_ support, using the +class lxml.etree.Schematron. It requires at least libxml2 2.6.21 to +work. The API is the same as for the other validators. Pass an +ElementTree object to construct a Schematron validator:: >>> f = StringIO('''\ ... From scoder at codespeak.net Tue Dec 18 22:28:46 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Dec 2007 22:28:46 +0100 (CET) Subject: [Lxml-checkins] r49905 - in lxml/trunk: . doc src/lxml/tests Message-ID: <20071218212846.51E35168448@codespeak.net> Author: scoder Date: Tue Dec 18 22:28:45 2007 New Revision: 49905 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/api.txt lxml/trunk/doc/parsing.txt lxml/trunk/doc/tutorial.txt lxml/trunk/src/lxml/tests/test_etree.py Log: r3102 at delle: sbehnel | 2007-12-18 10:09:44 +0100 fixed test cases after newline-after-document serialisation change Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Tue Dec 18 22:28:45 2007 @@ -112,11 +112,9 @@ d >>> print etree.tostring(tree) - ElementTree objects are serialised as complete documents, including -preceding or trailing processing instructions and comments. Mind also -the additional line break at the end. +preceding or trailing processing instructions and comments. All operations that you run on such an ElementTree (like XPath, XSLT, etc.) will understand the explicitly chosen root as root node of a document. They Modified: lxml/trunk/doc/parsing.txt ============================================================================== --- lxml/trunk/doc/parsing.txt (original) +++ lxml/trunk/doc/parsing.txt Tue Dec 18 22:28:45 2007 @@ -514,7 +514,7 @@ >>> tree = etree.ElementTree(el) >>> etree.tounicode(tree) - u'\n' + u'' The result of ``tounicode()`` can be treated like any other Python unicode string and then passed back into the parsers. However, if you want to save Modified: lxml/trunk/doc/tutorial.txt ============================================================================== --- lxml/trunk/doc/tutorial.txt (original) +++ lxml/trunk/doc/tutorial.txt Tue Dec 18 22:28:45 2007 @@ -401,8 +401,7 @@ and general document handling. One of the bigger differences is that it serialises as a complete document, as opposed to a single ``Element``. This includes top-level processing instructions and -comments, an additional line break at the end, as well as a DOCTYPE -and other DTD content in the document:: +comments, as well as a DOCTYPE and other DTD content in the document:: >>> from StringIO import StringIO >>> tree = etree.parse(StringIO('''\ @@ -424,7 +423,6 @@ eggs - >>> # lxml 1.3.4 and later >>> print etree.tostring(etree.ElementTree(tree.getroot())) @@ -434,7 +432,6 @@ eggs - >>> # ElementTree and lxml <= 1.3.3 >>> print etree.tostring(tree.getroot()) @@ -495,7 +492,6 @@ >>> print etree.tostring(tree) data - Note that ``parse()`` returns an ElementTree object, not an Element object as the string parser functions:: @@ -559,7 +555,6 @@ >>> print etree.tostring(tree) - The second way is through a feed parser interface, given by the ``feed(data)`` and ``close()`` methods:: Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Tue Dec 18 22:28:45 2007 @@ -284,7 +284,7 @@ tostring = self.etree.tostring XMLParser = self.etree.XMLParser - xml = '\n\n\n' + xml = '' f = StringIO(xml) tree = parse(f) @@ -295,7 +295,7 @@ parser = XMLParser(remove_pis=True) tree = parse(f, parser) self.assertEquals( - '\n', + '', tostring(tree)) def test_parse_parser_type_error(self): @@ -1819,7 +1819,7 @@ ]> - test-test + test-test\ ''' tree = self.etree.parse(StringIO(xml)) self.assertEqual(self.etree.tostring(tree).replace(" ", ""), From scoder at codespeak.net Tue Dec 18 22:28:51 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Dec 2007 22:28:51 +0100 (CET) Subject: [Lxml-checkins] r49906 - in lxml/trunk: . src/lxml Message-ID: <20071218212851.1664E168464@codespeak.net> Author: scoder Date: Tue Dec 18 22:28:50 2007 New Revision: 49906 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/lxml.etree.pyx Log: r3103 at delle: sbehnel | 2007-12-18 10:11:28 +0100 reverted getiterator() behaviour to returning a real iterator, method is now officially deprecated Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Dec 18 22:28:50 2007 @@ -23,6 +23,13 @@ Other changes ------------- +* The ``getiterator()`` method on Elements and ElementTrees was + reverted to return an iterator as it did in lxml 1.x. The ET API + specification allows it to return either a sequence or an iterator, + and it traditionally returned a sequence in ET and an iterator in + lxml. However, it is now deprecated in favour of the ``iter()`` + method, which should be used in new code wherever possible. + * The 'pretty printed' serialisation of ElementTree objects now appends a newline at the end of the document and also inserts newlines between the top-level processing instructions and comments Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Dec 18 22:28:50 2007 @@ -1099,20 +1099,27 @@ return _elementTreeFactory(self._doc, None) def getiterator(self, tag=None): - """Returns a sequence of all elements in the subtree in document order - (depth first pre-order), starting with this element. + """Returns a sequence or iterator of all elements in the subtree in + document order (depth first pre-order), starting with this + element. - Can be restricted to find only elements with a specific tag or from a - namespace. + Can be restricted to find only elements with a specific tag + (pass ``tag="xyz"``) or from a namespace (pass ``tag="{ns}*"``). You can also pass the Element, Comment, ProcessingInstruction and Entity factory functions to look only for the specific element type. - Note that this method previously returned an iterator, which diverged - from the original ElementTree behaviour. If you want an efficient - iterator, use the ``el.iter()`` method instead. + Note that this method is deprecated as of ElementTree 1.3 and + lxml 2.0. It returns an iterator in lxml, which diverges from + the original ElementTree behaviour. If you want an efficient + iterator, use the ``element.iter()`` method instead. You + should only use this method in new code if you require + backwards compatibility with older versions of lxml or + ElementTree. + + @deprecated """ - return list(ElementDepthFirstIterator(self, tag)) + return ElementDepthFirstIterator(self, tag) def iter(self, tag=None): """Iterate over all elements in the subtree in document order (depth @@ -1456,17 +1463,29 @@ return path def getiterator(self, tag=None): - """Creates an iterator for the root element. The iterator loops over all elements - in this tree, in document order. + """Returns a sequence or iterator of all elements in document order + (depth first pre-order), starting with the root element. + + Can be restricted to find only elements with a specific tag + (pass ``tag="xyz"`` or ``tag="{ns}xyz"``) or from a namespace + (pass ``tag="{ns}*"``). - Note that this method is deprecated in favour of the ``el.iter()`` - method. In new code, use it only if you require backwards - compatibility. + You can also pass the Element, Comment, ProcessingInstruction and + Entity factory functions to look only for the specific element type. + + Note that this method is deprecated as of ElementTree 1.3 and + lxml 2.0. It returns an iterator in lxml, which diverges from + the original ElementTree behaviour. If you want an efficient + iterator, use the ``tree.iter()`` method instead. You should + only use this method in new code if you require backwards + compatibility with older versions of lxml or ElementTree. + + @deprecated """ root = self.getroot() if root is None: return () - return root.iter(tag) + return root.getiterator(tag) def iter(self, tag=None): """Creates an iterator for the root element. The iterator loops over @@ -1479,7 +1498,7 @@ def find(self, path): """Finds the first toplevel element with given tag. Same as - getroot().find(path). + ``tree.getroot().find(path)``. """ self._assertHasRoot() root = self.getroot() From scoder at codespeak.net Tue Dec 18 22:28:56 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Dec 2007 22:28:56 +0100 (CET) Subject: [Lxml-checkins] r49907 - in lxml/trunk: . doc Message-ID: <20071218212856.39E651684EF@codespeak.net> Author: scoder Date: Tue Dec 18 22:28:55 2007 New Revision: 49907 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/doc/main.txt lxml/trunk/version.txt Log: r3104 at delle: sbehnel | 2007-12-18 10:16:48 +0100 prepare release of lxml 2.0beta1 Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Dec 18 22:28:55 2007 @@ -2,8 +2,8 @@ lxml changelog ============== -Under development -================= +2.0beta1 (2007-12-18) +===================== Features added -------------- Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Tue Dec 18 22:28:55 2007 @@ -138,8 +138,8 @@ .. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/ .. _`this key`: pubkey.asc -The latest version is `lxml 2.0alpha5`_, released 2007-11-24 -(`changes for 2.0alpha5`_). `Older versions`_ are listed below. +The latest version is `lxml 2.0beta1`_, released 2007-12-18 +(`changes for 2.0beta1`_). `Older versions`_ are listed below. .. _`Older versions`: #old-versions @@ -199,6 +199,8 @@ Old Versions ------------ +* `lxml 2.0alpha5`_, released 2007-11-24 (`changes for 2.0alpha5`_) + * `lxml 2.0alpha4`_, released 2007-10-07 (`changes for 2.0alpha4`_) * `lxml 2.0alpha3`_, released 2007-09-26 (`changes for 2.0alpha3`_) @@ -257,6 +259,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.0beta1`: lxml-2.0beta1.tgz .. _`lxml 2.0alpha5`: lxml-2.0alpha5.tgz .. _`lxml 2.0alpha4`: lxml-2.0alpha4.tgz .. _`lxml 2.0alpha3`: lxml-2.0alpha3.tgz @@ -287,6 +290,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.0beta1`: changes-2.0beta1.html .. _`changes for 2.0alpha5`: changes-2.0alpha5.html .. _`changes for 2.0alpha4`: changes-2.0alpha4.html .. _`changes for 2.0alpha3`: changes-2.0alpha3.html Modified: lxml/trunk/version.txt ============================================================================== --- lxml/trunk/version.txt (original) +++ lxml/trunk/version.txt Tue Dec 18 22:28:55 2007 @@ -1 +1 @@ -2.0alpha5 +2.0beta1 From scoder at codespeak.net Tue Dec 18 22:28:59 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Dec 2007 22:28:59 +0100 (CET) Subject: [Lxml-checkins] r49908 - in lxml/trunk: . src/lxml Message-ID: <20071218212859.131661684F2@codespeak.net> Author: scoder Date: Tue Dec 18 22:28:59 2007 New Revision: 49908 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r3105 at delle: sbehnel | 2007-12-18 11:17:22 +0100 mark getchildren() as deprecated Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Dec 18 22:28:59 2007 @@ -1109,15 +1109,13 @@ You can also pass the Element, Comment, ProcessingInstruction and Entity factory functions to look only for the specific element type. - Note that this method is deprecated as of ElementTree 1.3 and - lxml 2.0. It returns an iterator in lxml, which diverges from - the original ElementTree behaviour. If you want an efficient - iterator, use the ``element.iter()`` method instead. You - should only use this method in new code if you require - backwards compatibility with older versions of lxml or - ElementTree. - - @deprecated + @deprecated: Note that this method is deprecated as of + ElementTree 1.3 and lxml 2.0. It returns an iterator in lxml, + which diverges from the original ElementTree behaviour. If + you want an efficient iterator, use the ``element.iter()`` + method instead. You should only use this method in new code + if you require backwards compatibility with older versions of + lxml or ElementTree. """ return ElementDepthFirstIterator(self, tag) @@ -1473,14 +1471,13 @@ You can also pass the Element, Comment, ProcessingInstruction and Entity factory functions to look only for the specific element type. - Note that this method is deprecated as of ElementTree 1.3 and - lxml 2.0. It returns an iterator in lxml, which diverges from - the original ElementTree behaviour. If you want an efficient - iterator, use the ``tree.iter()`` method instead. You should - only use this method in new code if you require backwards - compatibility with older versions of lxml or ElementTree. - - @deprecated + @deprecated: Note that this method is deprecated as of + ElementTree 1.3 and lxml 2.0. It returns an iterator in lxml, + which diverges from the original ElementTree behaviour. If + you want an efficient iterator, use the ``tree.iter()`` method + instead. You should only use this method in new code if you + require backwards compatibility with older versions of lxml or + ElementTree. """ root = self.getroot() if root is None: From scoder at codespeak.net Tue Dec 18 22:29:03 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Dec 2007 22:29:03 +0100 (CET) Subject: [Lxml-checkins] r49909 - in lxml/trunk: . doc Message-ID: <20071218212903.9389D1684EF@codespeak.net> Author: scoder Date: Tue Dec 18 22:29:03 2007 New Revision: 49909 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/api.txt Log: r3106 at delle: sbehnel | 2007-12-18 18:52:41 +0100 cleanup in API docs Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Tue Dec 18 22:29:03 2007 @@ -141,45 +141,48 @@ The ElementTree API makes Elements iterable to supports iteration over their children. Using the tree defined above, we get:: - >>> [ el.tag for el in root ] + >>> [ child.tag for child in root ] ['a', 'b', 'c', 'd'] -Tree traversal is commonly based on the ``element.getiterator()`` method:: +To iterate in the opposite direction, use the ``reversed()`` function +that exists in Python 2.4 and later. - >>> [ el.tag for el in root.getiterator() ] +Tree traversal should use the ``element.iter()`` method:: + + >>> [ el.tag for el in root.iter() ] ['root', 'a', 'b', 'c', 'd', 'e'] lxml.etree also supports this, but additionally features an extended API for iteration over the children, following/preceding siblings, ancestors and descendants of an element, as defined by the respective XPath axis:: - >>> [ el.tag for el in root.iterchildren() ] + >>> [ child.tag for child in root.iterchildren() ] ['a', 'b', 'c', 'd'] - >>> [ el.tag for el in root.iterchildren(reversed=True) ] + >>> [ child.tag for child in root.iterchildren(reversed=True) ] ['d', 'c', 'b', 'a'] - >>> [ el.tag for el in b.itersiblings() ] + >>> [ sibling.tag for sibling in b.itersiblings() ] ['c', 'd'] - >>> [ el.tag for el in c.itersiblings(preceding=True) ] + >>> [ sibling.tag for sibling in c.itersiblings(preceding=True) ] ['b', 'a'] - >>> [ el.tag for el in e.iterancestors() ] + >>> [ ancestor.tag for ancestor in e.iterancestors() ] ['d', 'root'] >>> [ el.tag for el in root.iterdescendants() ] ['a', 'b', 'c', 'd', 'e'] -Note how ``element.iterdescendants()`` does not include the element itself, as -opposed to ``element.getiterator()``. The latter effectively implements the -'descendant-or-self' axis in XPath. +Note how ``element.iterdescendants()`` does not include the element +itself, as opposed to ``element.iter()``. The latter effectively +implements the 'descendant-or-self' axis in XPath. All of these iterators support an additional ``tag`` keyword argument that filters the generated elements by tag name:: - >>> [ el.tag for el in root.iterchildren(tag='a') ] + >>> [ child.tag for child in root.iterchildren(tag='a') ] ['a'] - >>> [ el.tag for el in d.iterchildren(tag='a') ] + >>> [ child.tag for child in d.iterchildren(tag='a') ] [] >>> [ el.tag for el in root.iterdescendants(tag='d') ] ['d'] - >>> [ el.tag for el in root.getiterator(tag='d') ] + >>> [ el.tag for el in root.iter(tag='d') ] ['d'] See also the section on the utility functions ``iterparse()`` and From scoder at codespeak.net Tue Dec 18 22:29:10 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Dec 2007 22:29:10 +0100 (CET) Subject: [Lxml-checkins] r49910 - in lxml/trunk: . doc src/lxml src/lxml/tests Message-ID: <20071218212910.418DB168464@codespeak.net> Author: scoder Date: Tue Dec 18 22:29:09 2007 New Revision: 49910 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/doc/api.txt lxml/trunk/doc/elementsoup.txt lxml/trunk/doc/parsing.txt lxml/trunk/doc/tutorial.txt lxml/trunk/src/lxml/serializer.pxi lxml/trunk/src/lxml/tests/test_etree.py lxml/trunk/src/lxml/tests/test_htmlparser.py Log: r3107 at delle: sbehnel | 2007-12-18 19:01:53 +0100 always append a newline when pretty printing on serialisation (not only for ElementTrees) Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Dec 18 22:29:09 2007 @@ -31,8 +31,10 @@ method, which should be used in new code wherever possible. * The 'pretty printed' serialisation of ElementTree objects now - appends a newline at the end of the document and also inserts - newlines between the top-level processing instructions and comments + inserts newlines at the root level between processing instructions, + comments and the root tag. + +* A 'pretty printed' serialisation is now terminated with a newline. * Second argument to ``lxml.etree.Extension()`` helper is no longer required, third argument is now a keyword-only argument ``ns``. Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Tue Dec 18 22:29:09 2007 @@ -266,11 +266,14 @@ >>> print etree.tostring(root) - >>> print etree.tostring(root, pretty_print=True) + >>> print etree.tostring(root, pretty_print=True), +Note the newline that is appended at the end when pretty printing the +output. It was added in lxml 2.0. + By default, lxml (just as ElementTree) outputs the XML declaration only if it is required by the standard:: Modified: lxml/trunk/doc/elementsoup.txt ============================================================================== --- lxml/trunk/doc/elementsoup.txt (original) +++ lxml/trunk/doc/elementsoup.txt Tue Dec 18 22:29:09 2007 @@ -30,7 +30,7 @@ To see what we have here, you can serialise it:: >>> from lxml.etree import tostring - >>> print tostring(root, pretty_print=True) + >>> print tostring(root, pretty_print=True), Modified: lxml/trunk/doc/parsing.txt ============================================================================== --- lxml/trunk/doc/parsing.txt (original) +++ lxml/trunk/doc/parsing.txt Tue Dec 18 22:29:09 2007 @@ -139,7 +139,7 @@ >>> parser = etree.HTMLParser() >>> tree = etree.parse(StringIO(broken_html), parser) - >>> print etree.tostring(tree.getroot(), pretty_print=True) + >>> print etree.tostring(tree.getroot(), pretty_print=True), test @@ -153,7 +153,7 @@ ElementTree:: >>> html = etree.HTML(broken_html) - >>> print etree.tostring(html, pretty_print=True) + >>> print etree.tostring(html, pretty_print=True), test Modified: lxml/trunk/doc/tutorial.txt ============================================================================== --- lxml/trunk/doc/tutorial.txt (original) +++ lxml/trunk/doc/tutorial.txt Tue Dec 18 22:29:09 2007 @@ -104,7 +104,7 @@ To see that this is really XML, you can serialise the tree you have created:: - >>> print etree.tostring(root, pretty_print=True) + >>> print etree.tostring(root, pretty_print=True), @@ -306,7 +306,7 @@ >>> etree.SubElement(root, "child").text = "Child 2" >>> etree.SubElement(root, "another").text = "Child 3" - >>> print etree.tostring(root, pretty_print=True) + >>> print etree.tostring(root, pretty_print=True), Child 1 Child 2 @@ -337,10 +337,10 @@ Serialisation ------------- -Serialisation commonly uses with the ``tostring()`` function that -returns a string, or the ``ElementTree.write()`` method that writes to -a file or file-like object. Both accept the same keyword arguments -like ``pretty_print`` for formatted output or ``encoding`` to select a +Serialisation commonly uses the ``tostring()`` function that returns a +string, or the ``ElementTree.write()`` method that writes to a file or +file-like object. Both accept the same keyword arguments like +``pretty_print`` for formatted output or ``encoding`` to select a specific output encoding other than plain ASCII:: >>> root = etree.XML('') @@ -356,13 +356,16 @@ - >>> print etree.tostring(root, pretty_print=True) + >>> print etree.tostring(root, pretty_print=True), +Note the newline that is appended at the end when pretty printing the +output. + Since lxml 2.0 (and ElementTree 1.3), the serialisation functions can do more than XML serialisation. You can serialise to HTML or extract the text content by passing the ``method`` keyword:: @@ -378,7 +381,7 @@ >>> print etree.tostring(root, method='html')

Hello
World

- >>> print etree.tostring(root, method='html', pretty_print=True) + >>> print etree.tostring(root, method='html', pretty_print=True),

Hello
World

@@ -657,7 +660,7 @@ >>> body = etree.SubElement(xhtml, "{http://www.w3.org/1999/xhtml}body") >>> body.text = "Hello World" - >>> print etree.tostring(xhtml, pretty_print=True) + >>> print etree.tostring(xhtml, pretty_print=True), Hello World @@ -680,7 +683,7 @@ >>> body = etree.SubElement(xhtml, XHTML + "body") >>> body.text = "Hello World" - >>> print etree.tostring(xhtml, pretty_print=True) + >>> print etree.tostring(xhtml, pretty_print=True), Hello World @@ -689,7 +692,7 @@ >>> body.set(XHTML + "bgcolor", "#CCFFAA") - >>> print etree.tostring(xhtml, pretty_print=True) + >>> print etree.tostring(xhtml, pretty_print=True), Hello World @@ -736,7 +739,7 @@ ... ) ... ) - >>> print etree.tostring(page, pretty_print=True) + >>> print etree.tostring(page, pretty_print=True), This is a sample document @@ -777,7 +780,7 @@ ... ) ... ) - >>> print etree.tostring(my_doc, pretty_print=True) + >>> print etree.tostring(my_doc, pretty_print=True), The dog and the hog Modified: lxml/trunk/src/lxml/serializer.pxi ============================================================================== --- lxml/trunk/src/lxml/serializer.pxi (original) +++ lxml/trunk/src/lxml/serializer.pxi Tue Dec 18 22:29:09 2007 @@ -172,8 +172,8 @@ _writeTail(c_buffer, c_node, encoding, pretty_print) if write_complete_document: _writeNextSiblings(c_buffer, c_node, encoding, pretty_print) - if pretty_print: - tree.xmlOutputBufferWriteString(c_buffer, "\n") + if pretty_print: + tree.xmlOutputBufferWriteString(c_buffer, "\n") cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer, char* version, char* encoding) nogil: Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Tue Dec 18 22:29:09 2007 @@ -1904,7 +1904,7 @@ self.assertEquals(result, "") result = tostring(a, pretty_print=True) - self.assertEquals(result, "\n \n \n") + self.assertEquals(result, "\n \n \n\n") def test_tostring_method_text_encoding(self): tostring = self.etree.tostring @@ -1989,7 +1989,7 @@ self.assertEquals(result, "") result = tounicode(a, pretty_print=True) - self.assertEquals(result, "\n \n \n") + self.assertEquals(result, "\n \n \n\n") def _writeElement(self, element, encoding='us-ascii'): """Write out element for comparison. Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_htmlparser.py (original) +++ lxml/trunk/src/lxml/tests/test_htmlparser.py Tue Dec 18 22:29:09 2007 @@ -20,7 +20,8 @@ test

page title

-""" + +""" broken_html_str = "test<body><h1>page title</h3></p></html>" uhtml_str = u"<html><head><title>test ??\uF8D2

page ??\uF8D2 title

" From scoder at codespeak.net Tue Dec 18 22:29:13 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Dec 2007 22:29:13 +0100 (CET) Subject: [Lxml-checkins] r49911 - in lxml/trunk: . src/lxml Message-ID: <20071218212913.ED14A1684F0@codespeak.net> Author: scoder Date: Tue Dec 18 22:29:13 2007 New Revision: 49911 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r3108 at delle: sbehnel | 2007-12-18 20:38:06 +0100 deprecation of getchildren() Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Dec 18 22:29:13 2007 @@ -1020,8 +1020,9 @@ """Returns all direct children. The elements are returned in document order. - Note that this method has been deprecated as of ElementTree 1.3. New - code should use ``list(element)`` or simply iterate over elements. + @deprecated: Note that this method has been deprecated as of + ElementTree 1.3 and lxml 2.0. New code should use + ``list(element)`` or simply iterate over elements. """ return _collectChildren(self) From scoder at codespeak.net Tue Dec 18 22:29:17 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Dec 2007 22:29:17 +0100 (CET) Subject: [Lxml-checkins] r49912 - in lxml/trunk: . src/lxml Message-ID: <20071218212917.0A4931684F2@codespeak.net> Author: scoder Date: Tue Dec 18 22:29:16 2007 New Revision: 49912 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r3109 at delle: sbehnel | 2007-12-18 20:57:43 +0100 made normal case in index() explicit Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Dec 18 22:29:16 2007 @@ -934,6 +934,17 @@ if c_child.parent is not self._c_node: raise ValueError, "Element is not a child of this node." + # handle the unbounded search straight away (normal case) + if stop is None and (start is None or start == 0): + k = 0 + c_child = c_child.prev + while c_child is not NULL: + if _isElement(c_child): + k = k + 1 + c_child = c_child.prev + return k + + # check indices if start is None: c_start = 0 else: From scoder at codespeak.net Tue Dec 18 22:29:21 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Dec 2007 22:29:21 +0100 (CET) Subject: [Lxml-checkins] r49913 - in lxml/trunk: . src/lxml Message-ID: <20071218212921.71B3B16851D@codespeak.net> Author: scoder Date: Tue Dec 18 22:29:20 2007 New Revision: 49913 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r3110 at delle: sbehnel | 2007-12-18 21:05:10 +0100 docstring update Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Dec 18 22:29:20 2007 @@ -1135,8 +1135,8 @@ """Iterate over all elements in the subtree in document order (depth first pre-order), starting with this element. - Can be restricted to find only elements with a specific tag or from a - namespace. + Can be restricted to find only elements with a specific tag + (pass ``tag="xyz"``) or from a namespace (pass ``tag="{ns}*"``). You can also pass the Element, Comment, ProcessingInstruction and Entity factory functions to look only for the specific element type. From scoder at codespeak.net Tue Dec 18 22:29:24 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Dec 2007 22:29:24 +0100 (CET) Subject: [Lxml-checkins] r49914 - in lxml/trunk: . src/lxml/tests Message-ID: <20071218212924.87781168559@codespeak.net> Author: scoder Date: Tue Dec 18 22:29:24 2007 New Revision: 49914 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/tests/test_elementtree.py Log: r3111 at delle: sbehnel | 2007-12-18 21:06:09 +0100 test case cleanup Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Tue Dec 18 22:29:24 2007 @@ -19,6 +19,14 @@ getattr(cElementTree, "VERSION", "0.0").split(".")]) <= (1,0,6): cElementTree = None +try: + reversed +except NameError: + # Python 2.3 + def reversed(seq): + seq = list(seq)[::-1] + return seq + class ETreeTestCaseBase(unittest.TestCase): etree = None @@ -593,13 +601,6 @@ def test_iteration_reversed(self): XML = self.etree.XML - - try: - reversed(()) - except NameError: - # before Python 2.4 - return - root = XML('TwoHm') result = [] for el in reversed(root): @@ -1451,6 +1452,23 @@ '', b) + def test_iter(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(b, 'd') + e = SubElement(c, 'e') + + self.assertEquals( + [a, b, d, c, e], + list(a.iter())) + self.assertEquals( + [d], + list(d.iter())) + def test_getiterator(self): Element = self.etree.Element SubElement = self.etree.SubElement From scoder at codespeak.net Wed Dec 19 08:54:18 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Dec 2007 08:54:18 +0100 (CET) Subject: [Lxml-checkins] r49916 - lxml/trunk Message-ID: <20071219075418.EC9481684CA@codespeak.net> Author: scoder Date: Wed Dec 19 08:54:17 2007 New Revision: 49916 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r3125 at delle: sbehnel | 2007-12-18 22:34:05 +0100 cleanup Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Dec 19 08:54:17 2007 @@ -41,6 +41,7 @@ * ``lxml.html.tostring`` takes an ``encoding`` argument. + 2.0alpha5 (2007-11-24) ====================== From scoder at codespeak.net Wed Dec 19 08:54:22 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Dec 2007 08:54:22 +0100 (CET) Subject: [Lxml-checkins] r49917 - in lxml/trunk: . src/lxml/tests Message-ID: <20071219075422.2FC411684E1@codespeak.net> Author: scoder Date: Wed Dec 19 08:54:21 2007 New Revision: 49917 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/tests/test_objectify.py Log: r3126 at delle: sbehnel | 2007-12-18 23:05:03 +0100 fix API usage Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Wed Dec 19 08:54:21 2007 @@ -1605,9 +1605,11 @@ root = XML(xml) objectify.annotate(root) - attribs = root.xpath("//@py:%s" % pytype_name, {"py" : pytype_ns}) + attribs = root.xpath("//@py:%s" % pytype_name, + namespaces={"py" : pytype_ns}) self.assertEquals(0, len(attribs)) - attribs = root.xpath("//@py:test", {"py" : "TEST"}) + attribs = root.xpath("//@py:test", + namespaces={"py" : "TEST"}) self.assertEquals(7, len(attribs)) objectify.setPytypeAttributeTag() @@ -1617,11 +1619,13 @@ self.assertNotEqual("test", pytype_name.lower()) root = XML(xml) - attribs = root.xpath("//@py:%s" % pytype_name, {"py" : pytype_ns}) + attribs = root.xpath("//@py:%s" % pytype_name, + namespaces={"py" : pytype_ns}) self.assertEquals(0, len(attribs)) objectify.annotate(root) - attribs = root.xpath("//@py:%s" % pytype_name, {"py" : pytype_ns}) + attribs = root.xpath("//@py:%s" % pytype_name, + namespaces={"py" : pytype_ns}) self.assertEquals(7, len(attribs)) def test_registered_types(self): From scoder at codespeak.net Wed Dec 19 08:54:25 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Dec 2007 08:54:25 +0100 (CET) Subject: [Lxml-checkins] r49918 - in lxml/trunk: . doc Message-ID: <20071219075425.F2FF0168508@codespeak.net> Author: scoder Date: Wed Dec 19 08:54:25 2007 New Revision: 49918 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/extensions.txt Log: r3127 at delle: sbehnel | 2007-12-18 23:05:17 +0100 fix API usage Modified: lxml/trunk/doc/extensions.txt ============================================================================== --- lxml/trunk/doc/extensions.txt (original) +++ lxml/trunk/doc/extensions.txt Wed Dec 19 08:54:25 2007 @@ -71,7 +71,7 @@ >>> ns = etree.FunctionNamespace('http://mydomain.org/myfunctions') >>> ns['hello'] = hello >>> prefixmap = {'f' : 'http://mydomain.org/myfunctions'} - >>> print root.xpath('f:hello(local-name(*))', prefixmap) + >>> print root.xpath('f:hello(local-name(*))', namespaces=prefixmap) Hello b @@ -324,7 +324,7 @@ >>> ns['new-node-set'] = returnsNodeSet - >>> e = etree.XPathEvaluator(doc, None) + >>> e = etree.XPathEvaluator(doc) >>> r = e.evaluate("new-node-set()/result") >>> print [ t.text for t in r ] From scoder at codespeak.net Wed Dec 19 08:54:29 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Dec 2007 08:54:29 +0100 (CET) Subject: [Lxml-checkins] r49919 - in lxml/trunk: . doc Message-ID: <20071219075429.10A63168516@codespeak.net> Author: scoder Date: Wed Dec 19 08:54:29 2007 New Revision: 49919 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/xpathxslt.txt Log: r3128 at delle: sbehnel | 2007-12-18 23:05:32 +0100 fix API usage Modified: lxml/trunk/doc/xpathxslt.txt ============================================================================== --- lxml/trunk/doc/xpathxslt.txt (original) +++ lxml/trunk/doc/xpathxslt.txt Wed Dec 19 08:54:29 2007 @@ -112,8 +112,9 @@ ... ''') >>> doc = etree.parse(f) - >>> r = doc.xpath('/t:foo/b:bar', {'t': 'http://codespeak.net/ns/test1', - ... 'b': 'http://codespeak.net/ns/test2'}) + >>> r = doc.xpath('/t:foo/b:bar', + ... namespaces={'t': 'http://codespeak.net/ns/test1', + ... 'b': 'http://codespeak.net/ns/test2'}) >>> len(r) 1 >>> r[0].tag @@ -195,7 +196,7 @@ >>> root = etree.XML("") - >>> find = etree.XPath("//n:b", {'n':'NS'}) + >>> find = etree.XPath("//n:b", namespaces={'n':'NS'}) >>> print find(root)[0].tag {NS}b @@ -203,7 +204,7 @@ >>> regexpNS = "http://exslt.org/regular-expressions" >>> find = etree.XPath("//*[re:test(., '^abc$', 'i')]", - ... {'re':regexpNS}) + ... namespaces={'re':regexpNS}) >>> root = etree.XML("aBaBc") >>> print find(root)[0].text @@ -257,7 +258,7 @@ >>> root = etree.XML("") - >>> find = etree.XPath("//p:b", {'p' : 'ns'}) + >>> find = etree.XPath("//p:b", namespaces={'p' : 'ns'}) >>> print find(root)[0].tag {ns}b From scoder at codespeak.net Wed Dec 19 08:54:33 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Dec 2007 08:54:33 +0100 (CET) Subject: [Lxml-checkins] r49920 - in lxml/trunk: . src/lxml/tests Message-ID: <20071219075433.0B6571684CA@codespeak.net> Author: scoder Date: Wed Dec 19 08:54:33 2007 New Revision: 49920 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/tests/test_xpathevaluator.py Log: r3129 at delle: sbehnel | 2007-12-18 23:06:04 +0100 fix API usage Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Wed Dec 19 08:54:33 2007 @@ -97,27 +97,27 @@ root = tree.getroot() self.assertEquals( [root[0]], - tree.xpath('//foo:b', {'foo': 'uri:a'})) + tree.xpath('//foo:b', namespaces={'foo': 'uri:a'})) self.assertEquals( [], - tree.xpath('//foo:b', {'foo': 'uri:c'})) + tree.xpath('//foo:b', namespaces={'foo': 'uri:c'})) self.assertEquals( [root[0]], - root.xpath('//baz:b', {'baz': 'uri:a'})) + root.xpath('//baz:b', namespaces={'baz': 'uri:a'})) def test_xpath_ns_none(self): tree = self.parse('') root = tree.getroot() self.assertRaises( TypeError, - root.xpath, '//b', {None: 'uri:a'}) + root.xpath, '//b', namespaces={None: 'uri:a'}) def test_xpath_ns_empty(self): tree = self.parse('') root = tree.getroot() self.assertRaises( TypeError, - root.xpath, '//b', {'': 'uri:a'}) + root.xpath, '//b', namespaces={'': 'uri:a'}) def test_xpath_error(self): tree = self.parse('') @@ -195,7 +195,7 @@ return 'hello %s' % a extension = {(None, 'foo'): foo} tree = self.parse('') - e = etree.XPathEvaluator(tree, None, [extension]) + e = etree.XPathEvaluator(tree, extensions=[extension]) self.assertEquals( "hello you", e.evaluate("foo('you')")) @@ -212,7 +212,7 @@ return 1/0 extension = {(None, 'foo'): foo} tree = self.parse('') - e = etree.XPathEvaluator(tree, None, [extension]) + e = etree.XPathEvaluator(tree, extensions=[extension]) self.assertRaises(ZeroDivisionError, e.evaluate, "foo('test')") def test_xpath_extensions_nodes(self): @@ -225,7 +225,7 @@ return r x = self.parse('') - e = etree.XPathEvaluator(x, None, [{(None, 'foo'): f}]) + e = etree.XPathEvaluator(x, extensions=[{(None, 'foo'): f}]) r = e.evaluate("foo('World')/result") self.assertEquals(2, len(r)) self.assertEquals('Hoi', r[0].text) @@ -241,7 +241,7 @@ return r x = self.parse('') - e = etree.XPathEvaluator(x, None, [{(None, 'foo'): f}]) + e = etree.XPathEvaluator(x, extensions=[{(None, 'foo'): f}]) r = e.evaluate("foo(/*)/result") self.assertEquals(2, len(r)) self.assertEquals('Hoi', r[0].text) @@ -258,7 +258,7 @@ return r x = self.parse('Honk') - e = etree.XPathEvaluator(x, None, [{(None, 'foo'): f}]) + e = etree.XPathEvaluator(x, extensions=[{(None, 'foo'): f}]) r = e.evaluate("foo(/*)/result") self.assertEquals(3, len(r)) self.assertEquals('Hoi', r[0].text) @@ -555,7 +555,7 @@ Test xpath extension functions. >>> root = SAMPLE_XML - >>> e = etree.XPathEvaluator(root, None, [extension]) + >>> e = etree.XPathEvaluator(root, extensions=[extension]) >>> e.evaluate("stringTest('you')") 'Hello you' >>> e.evaluate(u"stringTest('\xe9lan')") From scoder at codespeak.net Wed Dec 19 08:54:37 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Dec 2007 08:54:37 +0100 (CET) Subject: [Lxml-checkins] r49921 - in lxml/trunk: . src/lxml Message-ID: <20071219075437.84DC616850A@codespeak.net> Author: scoder Date: Wed Dec 19 08:54:36 2007 New Revision: 49921 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/xslt.pxi Log: r3130 at delle: sbehnel | 2007-12-18 23:06:59 +0100 use keyword-only args in XSLT API Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Wed Dec 19 08:54:36 2007 @@ -179,7 +179,7 @@ * write_network """ cdef xslt.xsltSecurityPrefs* _prefs - def __init__(self, read_file=True, write_file=True, create_dir=True, + def __init__(self, *, read_file=True, write_file=True, create_dir=True, read_network=True, write_network=True): self._prefs = xslt.xsltNewSecurityPrefs() if self._prefs is NULL: @@ -269,7 +269,7 @@ cdef XSLTAccessControl _access_control cdef _ErrorLog _error_log - def __init__(self, xslt_input, extensions=None, regexp=True, + def __init__(self, xslt_input, *, extensions=None, regexp=True, access_control=None): cdef xslt.xsltStylesheet* c_style cdef xmlDoc* c_doc @@ -329,8 +329,8 @@ def __get__(self): return self._error_log.copy() - def apply(self, _input, profile_run=False, **_kw): - return self(_input, profile_run, **_kw) + def apply(self, _input, *, profile_run=False, **_kw): + return self(_input, profile_run=profile_run, **_kw) def tostring(self, _ElementTree result_tree): """Save result doc to string based on stylesheet output method. @@ -360,7 +360,7 @@ return new_xslt - def __call__(self, _input, profile_run=False, **_kw): + def __call__(self, _input, *, profile_run=False, **_kw): cdef _XSLTContext context cdef _XSLTResolverContext resolver_context cdef _Document input_doc @@ -595,7 +595,7 @@ if __findStylesheetByID is None: __findStylesheetByID = XPath( "//xsl:stylesheet[@xml:id = $id]", - {"xsl" : "http://www.w3.org/1999/XSL/Transform"}) + namespaces={"xsl" : "http://www.w3.org/1999/XSL/Transform"}) return __findStylesheetByID(doc, id=id) cdef class _XSLTProcessingInstruction(PIBase): From scoder at codespeak.net Wed Dec 19 08:54:40 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Dec 2007 08:54:40 +0100 (CET) Subject: [Lxml-checkins] r49922 - lxml/trunk Message-ID: <20071219075440.C214B16850A@codespeak.net> Author: scoder Date: Wed Dec 19 08:54:40 2007 New Revision: 49922 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r3131 at delle: sbehnel | 2007-12-18 23:20:05 +0100 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Dec 19 08:54:40 2007 @@ -23,6 +23,8 @@ Other changes ------------- +* The XSLT API now requires keyword-only arguments. + * The ``getiterator()`` method on Elements and ElementTrees was reverted to return an iterator as it did in lxml 1.x. The ET API specification allows it to return either a sequence or an iterator, From scoder at codespeak.net Wed Dec 19 08:54:44 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Dec 2007 08:54:44 +0100 (CET) Subject: [Lxml-checkins] r49923 - in lxml/trunk: . doc Message-ID: <20071219075444.CF713168508@codespeak.net> Author: scoder Date: Wed Dec 19 08:54:44 2007 New Revision: 49923 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/doc/main.txt lxml/trunk/version.txt Log: r3132 at delle: sbehnel | 2007-12-19 08:54:03 +0100 another alpha (sigh) Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Dec 19 08:54:44 2007 @@ -2,8 +2,8 @@ lxml changelog ============== -2.0beta1 (2007-12-18) -===================== +2.0alpha6 (2007-12-19) +====================== Features added -------------- @@ -23,7 +23,7 @@ Other changes ------------- -* The XSLT API now requires keyword-only arguments. +* Parts of the XSLT API now require keyword-only arguments. * The ``getiterator()`` method on Elements and ElementTrees was reverted to return an iterator as it did in lxml 1.x. The ET API Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Wed Dec 19 08:54:44 2007 @@ -138,8 +138,8 @@ .. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/ .. _`this key`: pubkey.asc -The latest version is `lxml 2.0beta1`_, released 2007-12-18 -(`changes for 2.0beta1`_). `Older versions`_ are listed below. +The latest version is `lxml 2.0alpha6`_, released 2007-12-19 +(`changes for 2.0alpha6`_). `Older versions`_ are listed below. .. _`Older versions`: #old-versions @@ -259,7 +259,7 @@ * `lxml 0.5`_, released 2005-04-08 -.. _`lxml 2.0beta1`: lxml-2.0beta1.tgz +.. _`lxml 2.0alpha6`: lxml-2.0alpha6.tgz .. _`lxml 2.0alpha5`: lxml-2.0alpha5.tgz .. _`lxml 2.0alpha4`: lxml-2.0alpha4.tgz .. _`lxml 2.0alpha3`: lxml-2.0alpha3.tgz @@ -290,7 +290,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz -.. _`changes for 2.0beta1`: changes-2.0beta1.html +.. _`changes for 2.0alpha6`: changes-2.0alpha6.html .. _`changes for 2.0alpha5`: changes-2.0alpha5.html .. _`changes for 2.0alpha4`: changes-2.0alpha4.html .. _`changes for 2.0alpha3`: changes-2.0alpha3.html Modified: lxml/trunk/version.txt ============================================================================== --- lxml/trunk/version.txt (original) +++ lxml/trunk/version.txt Wed Dec 19 08:54:44 2007 @@ -1 +1 @@ -2.0beta1 +2.0alpha6 From scoder at codespeak.net Wed Dec 19 12:02:57 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Dec 2007 12:02:57 +0100 (CET) Subject: [Lxml-checkins] r49926 - in lxml/trunk: . src/lxml Message-ID: <20071219110257.3D59E168514@codespeak.net> Author: scoder Date: Wed Dec 19 12:02:56 2007 New Revision: 49926 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/xmlid.pxi Log: r3141 at delle: sbehnel | 2007-12-19 09:13:32 +0100 eliminated internal calls to ElementTree() Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Wed Dec 19 12:02:56 2007 @@ -1400,13 +1400,13 @@ return self._context_node def __copy__(self): - return ElementTree(self._context_node) + return _elementTreeFactory(self._doc, self._context_node) def __deepcopy__(self, memo): - if self._context_node is None: - return ElementTree() - else: - return ElementTree( self._context_node.__copy__() ) + cdef _Element root + if self._context_node is not None: + root = self._context_node.__copy__() + return _elementTreeFactory(None, root) property docinfo: """Information about the document provided by parser and DTD. This Modified: lxml/trunk/src/lxml/xmlid.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlid.pxi (original) +++ lxml/trunk/src/lxml/xmlid.pxi Wed Dec 19 12:02:56 2007 @@ -1,3 +1,5 @@ +cdef object _find_id_attributes + def XMLID(text): """Parse the text and return a tuple (root node, ID dictionary). The root node is the same as returned by the XML() function. The dictionary @@ -5,10 +7,14 @@ attributes. The elements referenced by the ID are stored as dictionary values. """ + global _find_id_attributes + if _find_id_attributes is None: + _find_id_attributes = XPath('//*[string(@id)]') + + # ElementTree compatible implementation: parse and look for 'id' attributes root = XML(text) - # ElementTree compatible implementation: look for 'id' attributes dic = {} - for elem in ElementTree(root).xpath('//*[string(@id)]'): + for elem in _find_id_attributes(root): python.PyDict_SetItem(dic, elem.get('id'), elem) return (root, dic) @@ -40,7 +46,7 @@ """ cdef _Document doc doc = _parseDocument(source, parser) - return (ElementTree(doc.getroot()), _IDDict(doc)) + return (_elementTreeFactory(doc, None), _IDDict(doc)) cdef class _IDDict: """A dictionary-like proxy class that mapps ID attributes to elements. From scoder at codespeak.net Wed Dec 19 12:13:14 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Dec 2007 12:13:14 +0100 (CET) Subject: [Lxml-checkins] r49927 - in lxml/trunk: . src/lxml Message-ID: <20071219111314.C0BBB168518@codespeak.net> Author: scoder Date: Wed Dec 19 12:13:14 2007 New Revision: 49927 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/xpath.pxi Log: r3143 at delle: sbehnel | 2007-12-19 12:13:06 +0100 proposed keyword-only API for XPath and iteration methods Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Dec 19 12:13:14 2007 @@ -23,7 +23,12 @@ Other changes ------------- -* Parts of the XSLT API now require keyword-only arguments. +* Various places in the XPath, XSLT and iteration APIs now require + keyword-only arguments. + +* The argument order in ``element.itersiblings()`` was changed to + match the order used in all other iteration methods. The second + argument ('preceding') is now a keyword-only argument. * The ``getiterator()`` method on Elements and ElementTrees was reverted to return an iterator as it did in lxml 1.x. The ET API Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Wed Dec 19 12:13:14 2007 @@ -1065,7 +1065,7 @@ return _elementFactory(self._doc, c_node) return None - def itersiblings(self, preceding=False, tag=None): + def itersiblings(self, tag=None, *, preceding=False): """Iterate over the following or preceding siblings of this element. The direction is determined by the 'preceding' keyword which defaults @@ -1073,7 +1073,7 @@ generated elements can be restricted to a specific tag name with the 'tag' keyword. """ - return SiblingsIterator(self, preceding, tag) + return SiblingsIterator(self, tag, preceding=preceding) def iterancestors(self, tag=None): """Iterate over the ancestors of this element (from parent to parent). @@ -1090,16 +1090,16 @@ itself. The generated elements can be restricted to a specific tag name with the 'tag' keyword. """ - return ElementDepthFirstIterator(self, tag, False) + return ElementDepthFirstIterator(self, tag, inclusive=False) - def iterchildren(self, reversed=False, tag=None): + def iterchildren(self, tag=None, *, reversed=False): """Iterate over the children of this element. As opposed to using normal iteration on this element, the generated elements can be restricted to a specific tag name with the 'tag' keyword and reversed with the 'reversed' keyword. """ - return ElementChildIterator(self, reversed, tag) + return ElementChildIterator(self, tag, reversed=reversed) def getroottree(self): """Return an ElementTree for the root node of the document that @@ -1143,7 +1143,7 @@ """ return ElementDepthFirstIterator(self, tag) - def itertext(self, tag=None, with_tail=True): + def itertext(self, tag=None, *, with_tail=True): """Iterates over the text content of a subtree. You can pass the ``tag`` keyword argument to restrict text content to @@ -1152,7 +1152,7 @@ You can set the ``with_tail`` keyword argument to ``False`` to skip over tail text. """ - return ElementTextIterator(self, tag, with_tail) + return ElementTextIterator(self, tag, with_tail=with_tail) def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): """Creates a new element associated with the same document. @@ -1188,10 +1188,11 @@ path = (path).text return _elementpath.iterfind(self, path) - def xpath(self, _path, namespaces=None, extensions=None, **_variables): + def xpath(self, _path, *, namespaces=None, extensions=None, **_variables): """Evaluate an xpath expression using the element as context node. """ - evaluator = XPathElementEvaluator(self, namespaces, extensions) + evaluator = XPathElementEvaluator(self, namespaces=namespaces, + extensions=extensions) return evaluator.evaluate(_path, **_variables) @@ -1545,7 +1546,7 @@ path = "." + path return root.iterfind(path) - def xpath(self, _path, namespaces=None, extensions=None, **_variables): + def xpath(self, _path, *, namespaces=None, extensions=None, **_variables): """XPath evaluate in context of document. ``namespaces`` is an optional dictionary with prefix to namespace URI @@ -1562,7 +1563,8 @@ XPathEvaluator directly. """ self._assertHasRoot() - evaluator = XPathDocumentEvaluator(self, namespaces, extensions) + evaluator = XPathDocumentEvaluator(self, namespaces=namespaces, + extensions=extensions) return evaluator.evaluate(_path, **_variables) def xslt(self, _xslt, extensions=None, access_control=None, **_kw): @@ -1892,7 +1894,7 @@ cdef class ElementChildIterator(_ElementIterator): "Iterates over the children of an element." - def __init__(self, _Element node not None, reversed=False, tag=None): + def __init__(self, _Element node not None, tag=None, *, reversed=False): cdef xmlNode* c_node self._initTagMatch(tag) if reversed: @@ -1916,7 +1918,7 @@ You can pass the boolean keyword ``preceding`` to specify the direction. """ - def __init__(self, _Element node not None, preceding=False, tag=None): + def __init__(self, _Element node not None, tag=None, *, preceding=False): self._initTagMatch(tag) if preceding: self._next_element = _previousElement @@ -1951,7 +1953,7 @@ # keep next node to return and a depth counter in the tree cdef _Element _next_node cdef _Element _top_node - def __init__(self, _Element node not None, tag=None, inclusive=True): + def __init__(self, _Element node not None, tag=None, *, inclusive=True): self._top_node = node self._next_node = node self._initTagMatch(tag) @@ -2009,7 +2011,7 @@ """ cdef object _nextEvent cdef _Element _start_element - def __init__(self, _Element element not None, tag=None, with_tail=True): + def __init__(self, _Element element not None, tag=None, *, with_tail=True): if with_tail: events = ("start", "end") else: Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Wed Dec 19 12:13:14 2007 @@ -215,7 +215,7 @@ the 'regexp' boolean keyword (defaults to True). """ cdef _Element _element - def __init__(self, _Element element not None, namespaces=None, + def __init__(self, _Element element not None, *, namespaces=None, extensions=None, regexp=True): cdef xpath.xmlXPathContext* xpathCtxt cdef int ns_register_status @@ -280,10 +280,11 @@ keyword argument. EXSLT regular expression support can be disabled with the 'regexp' boolean keyword (defaults to True). """ - def __init__(self, _ElementTree etree not None, namespaces=None, + def __init__(self, _ElementTree etree not None, *, namespaces=None, extensions=None, regexp=True): XPathElementEvaluator.__init__( - self, etree._context_node, namespaces, extensions, regexp) + self, etree._context_node, namespaces=namespaces, + extensions=extensions, regexp=regexp) def __call__(self, _path, **_variables): """Evaluate an XPath expression on the document. @@ -322,7 +323,7 @@ return result -def XPathEvaluator(etree_or_element, namespaces=None, extensions=None, +def XPathEvaluator(etree_or_element, *, namespaces=None, extensions=None, regexp=True): """Creates an XPath evaluator for an ElementTree or an Element. @@ -334,11 +335,13 @@ the 'regexp' boolean keyword (defaults to True). """ if isinstance(etree_or_element, _ElementTree): - return XPathDocumentEvaluator(etree_or_element, namespaces, - extensions, regexp) + return XPathDocumentEvaluator( + etree_or_element, namespaces=namespaces, + extensions=extensions, regexp=regexp) else: - return XPathElementEvaluator(etree_or_element, namespaces, - extensions, regexp) + return XPathElementEvaluator( + etree_or_element, namespaces=namespaces, + extensions=extensions, regexp=regexp) cdef class XPath(_XPathEvaluatorBase): @@ -353,7 +356,7 @@ cdef xpath.xmlXPathCompExpr* _xpath cdef readonly object path - def __init__(self, path, namespaces=None, extensions=None, regexp=True): + def __init__(self, path, *, namespaces=None, extensions=None, regexp=True): cdef xpath.xmlXPathContext* xpathCtxt _XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp) self.path = path @@ -415,9 +418,10 @@ Note that this class does not accept the ``namespace`` keyword argument. All namespaces must be passed as part of the path string. """ - def __init__(self, path, extensions=None, regexp=True): + def __init__(self, path, *, extensions=None, regexp=True): path, namespaces = self._nsextract_path(path) - XPath.__init__(self, path, namespaces, extensions, regexp) + XPath.__init__(self, path, namespaces=namespaces, + extensions=extensions, regexp=regexp) cdef _nsextract_path(self, path): # replace {namespaces} by new prefixes From scoder at codespeak.net Wed Dec 19 12:31:52 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Dec 2007 12:31:52 +0100 (CET) Subject: [Lxml-checkins] r49928 - in lxml/trunk: . doc Message-ID: <20071219113152.9C4BA168514@codespeak.net> Author: scoder Date: Wed Dec 19 12:31:50 2007 New Revision: 49928 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/build.txt lxml/trunk/setup.py Log: r3145 at delle: sbehnel | 2007-12-19 12:31:40 +0100 require Cython 0.9.6.10 Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Wed Dec 19 12:31:50 2007 @@ -33,11 +33,11 @@ be an lxml developer, you do need a working Cython installation. You can use EasyInstall_ to install it:: - easy_install Cython==0.9.6.8 + easy_install Cython==0.9.6.10 .. _EasyInstall: http://peak.telecommunity.com/DevCenter/EasyInstall -lxml currently requires at least Cython 0.9.6.8, but later versions +lxml currently requires at least Cython 0.9.6.10, but later versions should work. Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Wed Dec 19 12:31:50 2007 @@ -16,7 +16,7 @@ except pkg_resources.VersionConflict: from ez_setup import use_setuptools use_setuptools(version="0.6c5") - #pkg_resources.require("Cython==0.9.6.6") + #pkg_resources.require("Cython==0.9.6.10") from setuptools import setup extra_options["zip_safe"] = False except ImportError: From scoder at codespeak.net Wed Dec 19 12:34:34 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Dec 2007 12:34:34 +0100 (CET) Subject: [Lxml-checkins] r49929 - in lxml/trunk: . doc Message-ID: <20071219113434.697EE168514@codespeak.net> Author: scoder Date: Wed Dec 19 12:34:33 2007 New Revision: 49929 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/mkhtml.py Log: r3147 at delle: sbehnel | 2007-12-19 12:34:29 +0100 fix API usage Modified: lxml/trunk/doc/mkhtml.py ============================================================================== --- lxml/trunk/doc/mkhtml.py (original) +++ lxml/trunk/doc/mkhtml.py Wed Dec 19 12:34:33 2007 @@ -20,13 +20,13 @@ ]) find_title = XPath("/h:html/h:head/h:title/text()", - {"h" : "http://www.w3.org/1999/xhtml"}) + namespaces={"h" : "http://www.w3.org/1999/xhtml"}) find_headings = XPath("//h:h1[not(@class)]/h:a/text()", - {"h" : "http://www.w3.org/1999/xhtml"}) + namespaces={"h" : "http://www.w3.org/1999/xhtml"}) find_menu = XPath("//h:ul[@id=$name]", - {"h" : "http://www.w3.org/1999/xhtml"}) + namespaces={"h" : "http://www.w3.org/1999/xhtml"}) find_page_end = XPath("/h:html/h:body/h:div[last()]", - {"h" : "http://www.w3.org/1999/xhtml"}) + namespaces={"h" : "http://www.w3.org/1999/xhtml"}) replace_invalid = re.compile(r'[-_/.\s\\]').sub From lxml-checkins at codespeak.net Wed Dec 19 17:38:57 2007 From: lxml-checkins at codespeak.net (VIAGRA ® Official Site) Date: Wed, 19 Dec 2007 17:38:57 +0100 (CET) Subject: [Lxml-checkins] December 76% OFF Message-ID: <20071219103911.7621.qmail@ppp85-141-227-102.pppoe.mtu-net.ru> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20071219/2000cd2b/attachment.htm From scoder at codespeak.net Thu Dec 20 11:35:04 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Dec 2007 11:35:04 +0100 (CET) Subject: [Lxml-checkins] r49948 - in lxml/trunk: . doc Message-ID: <20071220103504.344DC169E2A@codespeak.net> Author: scoder Date: Thu Dec 20 11:35:02 2007 New Revision: 49948 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/FAQ.txt Log: r3149 at delle: sbehnel | 2007-12-19 16:37:14 +0100 replaced Pyrex by Cython in docs Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Thu Dec 20 11:35:02 2007 @@ -240,7 +240,7 @@ the eggs can only support the one they were compiled with. This means that you have to compile lxml from sources for your system. Note -that you do not need Pyrex for this, the lxml source distribution is directly +that you do not need Cython for this, the lxml source distribution is directly compilable on both platform types. See the `build instructions`_ on how to do this. @@ -257,9 +257,9 @@ the C-level is required for performance reasons. To avoid writing plain C-code and caring too much about the details of -built-in types and reference counting, lxml is written in Pyrex_, a +built-in types and reference counting, lxml is written in Cython_, a Python-like language that is translated into C-code. Chances are that if you -know Python, you can write `code that Pyrex accepts`_. Again, the C-ish style +know Python, you can write `code that Cython accepts`_. Again, the C-ish style used in the lxml code is just for performance optimisations. If you want to contribute, don't bother with the details, a Python implementation of your contribution is better than none. And keep in mind that lxml's flexible API @@ -269,8 +269,8 @@ Please contact the `mailing list`_ if you need any help. -.. _Pyrex: http://www.cosc.canterbury.ac.nz/greg.ewing/python/Pyrex/ -.. _`code that Pyrex accepts`: http://www.cosc.canterbury.ac.nz/greg.ewing/python/Pyrex/version/Doc/overview.html +.. _Cython: http://www.cython.org/ +.. _`code that Cython accepts`: http://www.cosc.canterbury.ac.nz/greg.ewing/python/Pyrex/version/Doc/overview.html How can I contribute? From scoder at codespeak.net Thu Dec 20 11:35:07 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Dec 2007 11:35:07 +0100 (CET) Subject: [Lxml-checkins] r49949 - in lxml/trunk: . doc Message-ID: <20071220103507.8E54E169E36@codespeak.net> Author: scoder Date: Thu Dec 20 11:35:07 2007 New Revision: 49949 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/FAQ.txt Log: r3150 at delle: sbehnel | 2007-12-20 11:34:35 +0100 FAQ fix Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Thu Dec 20 11:35:07 2007 @@ -394,12 +394,11 @@ from disk and memory, as long as you use either the default parser (which is replicated for each thread) or create a parser for each thread yourself. lxml also allows concurrency during validation (RelaxNG and XMLSchema) and XSL -transformation. You can share RelaxNG, XMLSchema and XSLT objects between -threads. While you can also share parsers between threads, this will -serialize the access to each of them, so it is better to copy() parsers or to -use the default parser. Note that access to the XML() and HTML() functions is -always serialized. If you need to parse concurrently from strings, use -``parse()`` with ``StringIO`` or pass a separate parser to these functions. +transformation. You can share RelaxNG, XMLSchema and (with restrictions) XSLT +objects between threads. While you can also share parsers between threads, +this will serialize the access to each of them, so it is better to ``copy()`` +parsers or to just use the default parser (which is automatically copied for +each thread). Due to the way libxslt handles threading, concurrent access to stylesheets is currently only possible if it was parsed in the main thread. Parsing and From scoder at codespeak.net Thu Dec 20 11:35:11 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Dec 2007 11:35:11 +0100 (CET) Subject: [Lxml-checkins] r49950 - in lxml/trunk: . benchmark Message-ID: <20071220103511.619B9169E24@codespeak.net> Author: scoder Date: Thu Dec 20 11:35:10 2007 New Revision: 49950 Modified: lxml/trunk/ (props changed) lxml/trunk/benchmark/bench_etree.py Log: r3151 at delle: sbehnel | 2007-12-20 11:34:50 +0100 API usage fix Modified: lxml/trunk/benchmark/bench_etree.py ============================================================================== --- lxml/trunk/benchmark/bench_etree.py (original) +++ lxml/trunk/benchmark/bench_etree.py Thu Dec 20 11:35:10 2007 @@ -37,24 +37,24 @@ @with_attributes(True, False) @with_text(text=True, utext=True) def bench_tostring_utf8(self, root): - self.etree.tostring(root, 'UTF-8') + self.etree.tostring(root, encoding='UTF-8') @with_attributes(True, False) @with_text(text=True, utext=True) def bench_tostring_utf16(self, root): - self.etree.tostring(root, 'UTF-16') + self.etree.tostring(root, encoding='UTF-16') @with_attributes(True, False) @with_text(text=True, utext=True) def bench_tostring_utf8_unicode_XML(self, root): - xml = unicode(self.etree.tostring(root, 'UTF-8'), 'UTF-8') + xml = unicode(self.etree.tostring(root, encoding='UTF-8'), 'UTF-8') self.etree.XML(xml) @with_attributes(True, False) @with_text(text=True, utext=True) def bench_write_utf8_parse_stringIO(self, root): f = StringIO() - self.etree.ElementTree(root).write(f, 'UTF-8') + self.etree.ElementTree(root).write(f, encoding='UTF-8') f.seek(0) self.etree.parse(f) From scoder at codespeak.net Thu Dec 20 17:32:00 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Dec 2007 17:32:00 +0100 (CET) Subject: [Lxml-checkins] r49952 - in lxml/trunk: . src/lxml Message-ID: <20071220163200.ED277168465@codespeak.net> Author: scoder Date: Thu Dec 20 17:31:58 2007 New Revision: 49952 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r3155 at delle: sbehnel | 2007-12-20 13:30:48 +0100 doc fix Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Thu Dec 20 17:31:58 2007 @@ -1950,7 +1950,7 @@ tree it traverses is modified during iteration. """ # we keep Python references here to control GC - # keep next node to return and a depth counter in the tree + # keep next node to return and the (s)top node cdef _Element _next_node cdef _Element _top_node def __init__(self, _Element node not None, tag=None, *, inclusive=True): From scoder at codespeak.net Thu Dec 20 17:32:04 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Dec 2007 17:32:04 +0100 (CET) Subject: [Lxml-checkins] r49953 - in lxml/trunk: . benchmark Message-ID: <20071220163204.8DE711684F2@codespeak.net> Author: scoder Date: Thu Dec 20 17:32:03 2007 New Revision: 49953 Modified: lxml/trunk/ (props changed) lxml/trunk/benchmark/benchbase.py Log: r3156 at delle: sbehnel | 2007-12-20 13:31:26 +0100 prefer locally installed ET/cET in benchmark imports Modified: lxml/trunk/benchmark/benchbase.py ============================================================================== --- lxml/trunk/benchmark/benchbase.py (original) +++ lxml/trunk/benchmark/benchbase.py Thu Dec 20 17:32:03 2007 @@ -458,11 +458,11 @@ except ValueError: pass try: - import xml.etree.cElementTree as cET + import cElementTree as cET _etrees.append(cET) except ImportError: try: - import cElementTree as cET + import xml.etree.cElementTree as cET _etrees.append(cET) except ImportError: pass @@ -474,11 +474,11 @@ pass else: try: - from xml.etree import ElementTree as ET + from elementtree import ElementTree as ET _etrees.append(ET) except ImportError: try: - from elementtree import ElementTree as ET + from xml.etree import ElementTree as ET _etrees.append(ET) except ImportError: pass From scoder at codespeak.net Thu Dec 20 17:32:07 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Dec 2007 17:32:07 +0100 (CET) Subject: [Lxml-checkins] r49954 - in lxml/trunk: . src/lxml Message-ID: <20071220163207.D83D11684F2@codespeak.net> Author: scoder Date: Thu Dec 20 17:32:07 2007 New Revision: 49954 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r3157 at delle: sbehnel | 2007-12-20 13:36:39 +0100 fix: release lock in corner case Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Thu Dec 20 17:32:07 2007 @@ -1232,6 +1232,8 @@ result = element_class() if hasProxy(c_node): # prevent re-entry race condition - we just called into Python + if config.ENABLE_THREADING: + python.PyThread_release_lock(ELEMENT_CREATION_LOCK) result._c_node = NULL return getProxy(c_node) result._doc = doc From scoder at codespeak.net Thu Dec 20 17:32:10 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Dec 2007 17:32:10 +0100 (CET) Subject: [Lxml-checkins] r49955 - in lxml/trunk: . benchmark Message-ID: <20071220163210.F0C0B168521@codespeak.net> Author: scoder Date: Thu Dec 20 17:32:10 2007 New Revision: 49955 Modified: lxml/trunk/ (props changed) lxml/trunk/benchmark/bench_xpath.py Log: r3158 at delle: sbehnel | 2007-12-20 17:28:08 +0100 API usage fix Modified: lxml/trunk/benchmark/bench_xpath.py ============================================================================== --- lxml/trunk/benchmark/bench_xpath.py (original) +++ lxml/trunk/benchmark/bench_xpath.py Thu Dec 20 17:32:10 2007 @@ -61,7 +61,7 @@ self.etree.FunctionNamespace("testns")["t"] = return_child try: - xpath = self.etree.XPath("test:t(.)", {"test":"testns"}) + xpath = self.etree.XPath("test:t(.)", namespaces={"test":"testns"}) for child in children: xpath(child) finally: From scoder at codespeak.net Thu Dec 20 17:32:14 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Dec 2007 17:32:14 +0100 (CET) Subject: [Lxml-checkins] r49956 - in lxml/trunk: . doc Message-ID: <20071220163214.91A5216852D@codespeak.net> Author: scoder Date: Thu Dec 20 17:32:14 2007 New Revision: 49956 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/performance.txt Log: r3159 at delle: sbehnel | 2007-12-20 17:31:49 +0100 updated benchmark results for lxml 2.0 Modified: lxml/trunk/doc/performance.txt ============================================================================== --- lxml/trunk/doc/performance.txt (original) +++ lxml/trunk/doc/performance.txt Thu Dec 20 17:32:14 2007 @@ -66,10 +66,12 @@ a specific part of the API yourself, please consider sending it to the lxml mailing list. -The timings cited below compare lxml 1.3 (with libxml2 2.6.27) to the -ElementTree and cElementTree versions shipped with CPython 2.5 (based on -ElementTree 1.2.6). They were run single-threaded on a 1.8GHz Intel Core Duo -machine under Ubuntu Linux 7.04 (Feisty). +The timings cited below compare lxml 2.0alpha (with libxml2 2.6.30) to +the December 2007 SVN trunk versions of ElementTree (1.3) and +cElementTree (1.2.7). They were run single-threaded on a 1.8GHz Intel +Core Duo machine under Ubuntu Linux 7.10 (Gutsy). The C libraries +were compiled with the same platform specific optimisation flags. The +Python interpreter (2.5.1) was used as provided by the distribution. .. _`bench_etree.py`: http://codespeak.net/svn/lxml/branch/lxml-1.3/benchmark/bench_etree.py .. _`bench_xpath.py`: http://codespeak.net/svn/lxml/branch/lxml-1.3/benchmark/bench_xpath.py @@ -103,73 +105,88 @@ Parsing and Serialising ======================= -These are areas where lxml excels. The reason is that both parts are executed -entirely at the C level, without major interaction with Python code. The -results are rather impressive. Compared to cElementTree, lxml is about 20 to -40 times faster on serialisation:: - - lxe: tostring_utf16 (SATR T1) 21.9206 msec/pass - cET: tostring_utf16 (SATR T1) 461.9428 msec/pass - ET : tostring_utf16 (SATR T1) 486.8946 msec/pass - - lxe: tostring_utf16 (UATR T1) 22.7508 msec/pass - cET: tostring_utf16 (UATR T1) 526.3446 msec/pass - ET : tostring_utf16 (UATR T1) 496.0767 msec/pass - - lxe: tostring_utf16 (S-TR T2) 23.8452 msec/pass - cET: tostring_utf16 (S-TR T2) 537.9200 msec/pass - ET : tostring_utf16 (S-TR T2) 504.4273 msec/pass - - lxe: tostring_utf8 (S-TR T2) 18.2550 msec/pass - cET: tostring_utf8 (S-TR T2) 528.3908 msec/pass - ET : tostring_utf8 (S-TR T2) 549.7071 msec/pass - - lxe: tostring_utf8 (U-TR T3) 2.5497 msec/pass - cET: tostring_utf8 (U-TR T3) 49.8495 msec/pass - ET : tostring_utf8 (U-TR T3) 62.6927 msec/pass - -For parsing, the difference between the libraries is smaller. The (c)ET -libraries use the expat parser, which is known to be extremely fast:: - - lxe: parse_stringIO (SAXR T1) 150.2380 msec/pass - cET: parse_stringIO (SAXR T1) 25.9311 msec/pass - ET : parse_stringIO (SAXR T1) 222.9431 msec/pass - - lxe: parse_stringIO (S-XR T3) 5.9490 msec/pass - cET: parse_stringIO (S-XR T3) 5.4519 msec/pass - ET : parse_stringIO (S-XR T3) 76.4120 msec/pass - - lxe: parse_stringIO (UAXR T3) 29.3601 msec/pass - cET: parse_stringIO (UAXR T3) 28.9941 msec/pass - ET : parse_stringIO (UAXR T3) 163.5361 msec/pass - -The expat parser allows cET to be up to 80% faster than lxml on plain parser -performance. Similar timings can be observed for the ``iterparse()`` -function. However, if you take a complete input-output cycle, the numbers -will look similar to these:: - - lxe: write_utf8_parse_stringIO (S-TR T1) 166.3210 msec/pass - cET: write_utf8_parse_stringIO (S-TR T1) 581.2099 msec/pass - ET : write_utf8_parse_stringIO (S-TR T1) 803.5331 msec/pass - - lxe: write_utf8_parse_stringIO (UATR T2) 184.4249 msec/pass - cET: write_utf8_parse_stringIO (UATR T2) 671.5119 msec/pass - ET : write_utf8_parse_stringIO (UATR T2) 924.3481 msec/pass - - lxe: write_utf8_parse_stringIO (S-TR T3) 9.1329 msec/pass - cET: write_utf8_parse_stringIO (S-TR T3) 77.9850 msec/pass - ET : write_utf8_parse_stringIO (S-TR T3) 157.0492 msec/pass - - lxe: write_utf8_parse_stringIO (SATR T4) 1.3900 msec/pass - cET: write_utf8_parse_stringIO (SATR T4) 12.6081 msec/pass - ET : write_utf8_parse_stringIO (SATR T4) 16.2580 msec/pass +Serialisation is an area where lxml excels. The reason is that it +executes entirely at the C level, without any interaction with Python +code. The results are rather impressive, especially for UTF-8, which +is native to libxml2. While 20 to 40 times faster than (c)ElementTree +1.2, lxml is still more than 5 times as fast as the much improved +ElementTree 1.3:: + + lxe: tostring_utf16 (SATR T1) 23.4821 msec/pass + cET: tostring_utf16 (SATR T1) 129.8430 msec/pass + ET : tostring_utf16 (SATR T1) 136.1301 msec/pass + + lxe: tostring_utf16 (UATR T1) 23.4859 msec/pass + cET: tostring_utf16 (UATR T1) 130.1570 msec/pass + ET : tostring_utf16 (UATR T1) 136.3101 msec/pass + + lxe: tostring_utf16 (S-TR T2) 24.2729 msec/pass + cET: tostring_utf16 (S-TR T2) 136.9388 msec/pass + ET : tostring_utf16 (S-TR T2) 143.9550 msec/pass + + lxe: tostring_utf8 (S-TR T2) 18.4860 msec/pass + cET: tostring_utf8 (S-TR T2) 137.0859 msec/pass + ET : tostring_utf8 (S-TR T2) 144.3110 msec/pass + + lxe: tostring_utf8 (U-TR T3) 2.7399 msec/pass + cET: tostring_utf8 (U-TR T3) 52.1040 msec/pass + ET : tostring_utf8 (U-TR T3) 53.1070 msec/pass + +For parsing, on the other hand, the advantage is clearly with +cElementTree. The (c)ET libraries use a very thin layer on top of the +expat parser, which is known to be extremely fast:: + + lxe: parse_stringIO (SAXR T1) 144.1851 msec/pass + cET: parse_stringIO (SAXR T1) 14.4269 msec/pass + ET : parse_stringIO (SAXR T1) 245.9190 msec/pass + + lxe: parse_stringIO (S-XR T3) 5.6100 msec/pass + cET: parse_stringIO (S-XR T3) 5.3229 msec/pass + ET : parse_stringIO (S-XR T3) 82.4831 msec/pass + + lxe: parse_stringIO (UAXR T3) 23.4420 msec/pass + cET: parse_stringIO (UAXR T3) 30.2689 msec/pass + ET : parse_stringIO (UAXR T3) 165.7169 msec/pass + +While about as fast for smaller documents, the expat parser allows cET +to be up to 10 times faster than lxml on plain parser performance for +large input documents. Similar timings can be observed for the +``iterparse()`` function:: + + lxe: iterparse_stringIO (SAXR T1) 160.3689 msec/pass + cET: iterparse_stringIO (SAXR T1) 19.1891 msec/pass + ET : iterparse_stringIO (SAXR T1) 274.8971 msec/pass + + lxe: iterparse_stringIO (UAXR T3) 24.9629 msec/pass + cET: iterparse_stringIO (UAXR T3) 31.7740 msec/pass + ET : iterparse_stringIO (UAXR T3) 173.8000 msec/pass + +However, if you benchmark the complete round-trip of a serialise-parse +cycle, the numbers will look similar to these:: + + lxe: write_utf8_parse_stringIO (S-TR T1) 160.0718 msec/pass + cET: write_utf8_parse_stringIO (S-TR T1) 207.6778 msec/pass + ET : write_utf8_parse_stringIO (S-TR T1) 450.2120 msec/pass + + lxe: write_utf8_parse_stringIO (UATR T2) 173.5830 msec/pass + cET: write_utf8_parse_stringIO (UATR T2) 253.0849 msec/pass + ET : write_utf8_parse_stringIO (UATR T2) 519.2261 msec/pass + + lxe: write_utf8_parse_stringIO (S-TR T3) 8.4269 msec/pass + cET: write_utf8_parse_stringIO (S-TR T3) 75.7639 msec/pass + ET : write_utf8_parse_stringIO (S-TR T3) 156.1930 msec/pass + + lxe: write_utf8_parse_stringIO (SATR T4) 1.2100 msec/pass + cET: write_utf8_parse_stringIO (SATR T4) 6.4859 msec/pass + ET : write_utf8_parse_stringIO (SATR T4) 9.9051 msec/pass For applications that require a high parser throughput and do little -serialization, cET is the best choice. Also for iterparse applications that -extract small amounts of data from large XML data sets. If it comes to -round-trip performance, however, lxml tends to be 3-4 times faster in -total. So, whenever the input documents are not considerably bigger than the -output, lxml is the clear winner. +serialization, cET is the best choice. Also for iterparse +applications that extract small amounts of data from large XML data +sets. If it comes to round-trip performance, however, lxml tends to +be between 30% and multiple times faster in total. So, whenever the +input documents are not considerably bigger than the output, lxml is +the clear winner. The ElementTree API @@ -182,23 +199,23 @@ restructuring. This can be seen from the tree setup times of the benchmark (given in seconds):: - lxe: -- S- U- -A SA UA - T1: 0.1181 0.1080 0.1074 0.1088 0.1087 0.1099 - T2: 0.1103 0.1109 0.1164 0.1241 0.1203 0.1231 - T3: 0.0297 0.0309 0.0297 0.0716 0.0704 0.0703 - T4: 0.0005 0.0004 0.0004 0.0014 0.0014 0.0014 - cET: -- S- U- -A SA UA - T1: 0.0290 0.0271 0.0275 0.0297 0.0273 0.0274 - T2: 0.0280 0.0280 0.0281 0.0285 0.0283 0.0286 - T3: 0.0071 0.0072 0.0071 0.0113 0.0096 0.0096 + lxe: -- S- U- -A SA UA + T1: 0.0914 0.0875 0.0872 0.0892 0.0882 0.0900 + T2: 0.0894 0.0897 0.0892 0.0988 0.0978 0.0974 + T3: 0.0219 0.0194 0.0189 0.0570 0.0570 0.0573 + T4: 0.0004 0.0004 0.0004 0.0012 0.0013 0.0012 + cET: -- S- U- -A SA UA + T1: 0.0272 0.0264 0.0267 0.0268 0.0261 0.0265 + T2: 0.0280 0.0274 0.0273 0.0273 0.0276 0.0275 + T3: 0.0065 0.0066 0.0065 0.0111 0.0088 0.0088 T4: 0.0001 0.0001 0.0001 0.0001 0.0001 0.0001 - ET : -- S- U- -A SA UA - T1: 0.1362 0.1985 0.2300 0.1344 0.2672 0.1335 - T2: 0.3107 0.1386 0.3581 0.3886 0.1388 0.4277 - T3: 0.0334 0.0332 0.0320 0.0367 0.3769 0.0375 - T4: 0.0006 0.0005 0.0008 0.0007 0.0007 0.0006 + ET : -- S- U- -A SA UA + T1: 0.1302 0.1903 0.2208 0.1265 0.2542 0.1267 + T2: 0.2994 0.1301 0.3402 0.3746 0.1326 0.4170 + T3: 0.0301 0.0310 0.0302 0.0348 0.3654 0.0349 + T4: 0.0006 0.0005 0.0008 0.0006 0.0007 0.0006 -While lxml is still faster than ET in most cases (30-60%), cET can be up to +While lxml is still faster than ET in most cases (10-70%), cET can be up to three times faster than lxml here. One of the reasons is that lxml must additionally discard the created Python elements after their use, when they are no longer referenced. ET and cET represent the tree itself through these @@ -208,36 +225,41 @@ Child access ------------ -The same reason makes operations like ``getchildren()`` more costly in lxml. -Where ET and cET can quickly create a shallow copy of their list of children, -lxml has to create a Python object for each child and collect them in a list:: - - lxe: root_getchildren (--TR T2) 0.1960 msec/pass - cET: root_getchildren (--TR T2) 0.0150 msec/pass - ET : root_getchildren (--TR T2) 0.0091 msec/pass - -When accessing single children, however, e.g. by index, this handicap is -negligible:: - - lxe: first_child (--TR T2) 0.2289 msec/pass - cET: first_child (--TR T2) 0.2048 msec/pass - ET : first_child (--TR T2) 0.9291 msec/pass - - lxe: last_child (--TR T1) 0.2310 msec/pass - cET: last_child (--TR T1) 0.2148 msec/pass - ET : last_child (--TR T1) 0.9191 msec/pass - -... unless you add the time to find a child index in a bigger list, as ET and -cET use Python lists here, which are based on arrays. The data structure used -by libxml2 is a linked tree, and thus, a linked list of children:: - - lxe: middle_child (--TR T1) 0.2759 msec/pass - cET: middle_child (--TR T1) 0.2069 msec/pass - ET : middle_child (--TR T1) 0.9291 msec/pass - - lxe: middle_child (--TR T2) 1.7111 msec/pass - cET: middle_child (--TR T2) 0.2089 msec/pass - ET : middle_child (--TR T2) 0.9360 msec/pass +The same reason makes operations like collecting children as in +``list(element)`` more costly in lxml. Where ET and cET can quickly +create a shallow copy of their list of children, lxml has to create a +Python object for each child and collect them in a list:: + + lxe: root_list_children (--TR T1) 0.0169 msec/pass + cET: root_list_children (--TR T1) 0.0081 msec/pass + ET : root_list_children (--TR T1) 0.0541 msec/pass + + lxe: root_list_children (--TR T2) 0.2339 msec/pass + cET: root_list_children (--TR T2) 0.0319 msec/pass + ET : root_list_children (--TR T2) 0.4420 msec/pass + +This handicap is also visible when accessing single children:: + + lxe: first_child (--TR T2) 0.3228 msec/pass + cET: first_child (--TR T2) 0.2170 msec/pass + ET : first_child (--TR T2) 0.9968 msec/pass + + lxe: last_child (--TR T1) 0.3269 msec/pass + cET: last_child (--TR T1) 0.2291 msec/pass + ET : last_child (--TR T1) 0.9830 msec/pass + +... unless you also add the time to find a child index in a bigger +list. ET and cET use Python lists here, which are based on arrays. +The data structure used by libxml2 is a linked tree, and thus, a +linked list of children:: + + lxe: middle_child (--TR T1) 0.3638 msec/pass + cET: middle_child (--TR T1) 0.2229 msec/pass + ET : middle_child (--TR T1) 1.0030 msec/pass + + lxe: middle_child (--TR T2) 2.1780 msec/pass + cET: middle_child (--TR T2) 0.2229 msec/pass + ET : middle_child (--TR T2) 0.9930 msec/pass Element creation @@ -247,21 +269,21 @@ in. This results in a major performance difference for creating independent Elements that end up in independently created documents:: - lxe: create_elements (--TC T2) 3.7301 msec/pass - cET: create_elements (--TC T2) 0.1960 msec/pass - ET : create_elements (--TC T2) 1.4279 msec/pass + lxe: create_elements (--TC T2) 3.1691 msec/pass + cET: create_elements (--TC T2) 0.1929 msec/pass + ET : create_elements (--TC T2) 1.3590 msec/pass Therefore, it is always preferable to create Elements for the document they are supposed to end up in, either as SubElements of an Element or using the explicit ``Element.makeelement()`` call:: - lxe: makeelement (--TC T2) 2.3680 msec/pass - cET: makeelement (--TC T2) 0.3128 msec/pass - ET : makeelement (--TC T2) 1.6940 msec/pass - - lxe: create_subelements (--TC T2) 2.2051 msec/pass - cET: create_subelements (--TC T2) 0.2370 msec/pass - ET : create_subelements (--TC T2) 3.2189 msec/pass + lxe: makeelement (--TC T2) 2.2941 msec/pass + cET: makeelement (--TC T2) 0.3211 msec/pass + ET : makeelement (--TC T2) 1.6358 msec/pass + + lxe: create_subelements (--TC T2) 2.1169 msec/pass + cET: create_subelements (--TC T2) 0.2351 msec/pass + ET : create_subelements (--TC T2) 3.2270 msec/pass So, if the main performance bottleneck of an application is creating large XML trees in memory through calls to Element and SubElement, cET is the best @@ -278,13 +300,13 @@ The following benchmark appends all root children of the second tree to the root of the first tree:: - lxe: append_from_document (--TR T1,T2) 4.3468 msec/pass - cET: append_from_document (--TR T1,T2) 0.2608 msec/pass - ET : append_from_document (--TR T1,T2) 1.2310 msec/pass - - lxe: append_from_document (--TR T3,T4) 0.0679 msec/pass - cET: append_from_document (--TR T3,T4) 0.0148 msec/pass - ET : append_from_document (--TR T3,T4) 0.0880 msec/pass + lxe: append_from_document (--TR T1,T2) 3.8681 msec/pass + cET: append_from_document (--TR T1,T2) 0.2699 msec/pass + ET : append_from_document (--TR T1,T2) 1.2650 msec/pass + + lxe: append_from_document (--TR T3,T4) 0.0570 msec/pass + cET: append_from_document (--TR T3,T4) 0.0169 msec/pass + ET : append_from_document (--TR T3,T4) 0.0820 msec/pass Although these are fairly small numbers compared to parsing, this easily shows the different performance classes for lxml and (c)ET. Where the latter do not @@ -295,15 +317,22 @@ This difference is not always as visible, but applies to most parts of the API, like inserting newly created elements:: - lxe: insert_from_document (--TR T1,T2) 6.3150 msec/pass - cET: insert_from_document (--TR T1,T2) 0.4039 msec/pass - ET : insert_from_document (--TR T1,T2) 1.4770 msec/pass + lxe: insert_from_document (--TR T1,T2) 5.8019 msec/pass + cET: insert_from_document (--TR T1,T2) 0.4041 msec/pass + ET : insert_from_document (--TR T1,T2) 1.4789 msec/pass -Or replacing the child slice by a new element:: +or replacing the child slice by a newly created element:: - lxe: replace_children_element (--TC T1) 0.2608 msec/pass + lxe: replace_children_element (--TC T1) 0.2520 msec/pass cET: replace_children_element (--TC T1) 0.0238 msec/pass - ET : replace_children_element (--TC T1) 0.1628 msec/pass + ET : replace_children_element (--TC T1) 0.1600 msec/pass + +as opposed to replacing the slice with an existing element from the +same document:: + + lxe: replace_children (--TC T1) 0.0188 msec/pass + cET: replace_children (--TC T1) 0.0119 msec/pass + ET : replace_children (--TC T1) 0.0739 msec/pass You should keep this difference in mind when you merge very large trees. @@ -313,17 +342,17 @@ Deep copying a tree is fast in lxml:: - lxe: deepcopy_all (--TR T1) 11.0400 msec/pass - cET: deepcopy_all (--TR T1) 119.6141 msec/pass - ET : deepcopy_all (--TR T1) 451.2160 msec/pass - - lxe: deepcopy_all (-ATR T2) 13.5410 msec/pass - cET: deepcopy_all (-ATR T2) 135.2482 msec/pass - ET : deepcopy_all (-ATR T2) 476.1350 msec/pass - - lxe: deepcopy_all (S-TR T3) 4.2889 msec/pass - cET: deepcopy_all (S-TR T3) 36.0429 msec/pass - ET : deepcopy_all (S-TR T3) 113.4322 msec/pass + lxe: deepcopy_all (--TR T1) 10.9420 msec/pass + cET: deepcopy_all (--TR T1) 120.6188 msec/pass + ET : deepcopy_all (--TR T1) 902.6880 msec/pass + + lxe: deepcopy_all (-ATR T2) 12.5830 msec/pass + cET: deepcopy_all (-ATR T2) 136.9810 msec/pass + ET : deepcopy_all (-ATR T2) 944.2801 msec/pass + + lxe: deepcopy_all (S-TR T3) 4.1170 msec/pass + cET: deepcopy_all (S-TR T3) 36.1221 msec/pass + ET : deepcopy_all (S-TR T3) 221.6041 msec/pass So, for example, if you have a database-like scenario where you parse in a large tree and then search and copy independent subtrees from it for further @@ -338,39 +367,39 @@ especially if few elements are of interest or the target element tag name is known, lxml is a good choice:: - lxe: getiterator_all (--TR T2) 6.4790 msec/pass - cET: getiterator_all (--TR T2) 28.2831 msec/pass - ET : getiterator_all (--TR T2) 26.0720 msec/pass - - lxe: getiterator_islice (--TR T2) 0.0892 msec/pass - cET: getiterator_islice (--TR T2) 0.2460 msec/pass - ET : getiterator_islice (--TR T2) 26.6550 msec/pass - - lxe: getiterator_tag (--TR T2) 0.3850 msec/pass - cET: getiterator_tag (--TR T2) 9.3720 msec/pass - ET : getiterator_tag (--TR T2) 22.8221 msec/pass - - lxe: getiterator_tag_all (--TR T2) 0.7222 msec/pass - cET: getiterator_tag_all (--TR T2) 27.2939 msec/pass - ET : getiterator_tag_all (--TR T2) 22.8271 msec/pass + lxe: getiterator_all (--TR T1) 6.0360 msec/pass + cET: getiterator_all (--TR T1) 39.9489 msec/pass + ET : getiterator_all (--TR T1) 23.0000 msec/pass + + lxe: getiterator_islice (--TR T2) 0.0851 msec/pass + cET: getiterator_islice (--TR T2) 0.3440 msec/pass + ET : getiterator_islice (--TR T2) 0.2429 msec/pass + + lxe: getiterator_tag (--TR T2) 0.3290 msec/pass + cET: getiterator_tag (--TR T2) 14.1001 msec/pass + ET : getiterator_tag (--TR T2) 7.4241 msec/pass + + lxe: getiterator_tag_all (--TR T2) 0.7281 msec/pass + cET: getiterator_tag_all (--TR T2) 40.7901 msec/pass + ET : getiterator_tag_all (--TR T2) 21.0390 msec/pass This translates directly into similar timings for ``Element.findall()``:: - lxe: findall (--TR T2) 6.8321 msec/pass - cET: findall (--TR T2) 28.8639 msec/pass - ET : findall (--TR T2) 27.1060 msec/pass - - lxe: findall (--TR T3) 1.3590 msec/pass - cET: findall (--TR T3) 8.9881 msec/pass - ET : findall (--TR T3) 6.4890 msec/pass - - lxe: findall_tag (--TR T2) 0.9229 msec/pass - cET: findall_tag (--TR T2) 27.2651 msec/pass - ET : findall_tag (--TR T2) 22.7208 msec/pass - - lxe: findall_tag (--TR T3) 0.1700 msec/pass - cET: findall_tag (--TR T3) 6.4540 msec/pass - ET : findall_tag (--TR T3) 5.4770 msec/pass + lxe: findall (--TR T2) 8.2440 msec/pass + cET: findall (--TR T2) 44.5340 msec/pass + ET : findall (--TR T2) 27.1149 msec/pass + + lxe: findall (--TR T3) 1.7269 msec/pass + cET: findall (--TR T3) 12.9611 msec/pass + ET : findall (--TR T3) 8.6131 msec/pass + + lxe: findall_tag (--TR T2) 0.8020 msec/pass + cET: findall_tag (--TR T2) 40.6358 msec/pass + ET : findall_tag (--TR T2) 21.4581 msec/pass + + lxe: findall_tag (--TR T3) 0.2341 msec/pass + cET: findall_tag (--TR T3) 9.6831 msec/pass + ET : findall_tag (--TR T3) 5.2109 msec/pass Note that all three libraries currently use the same Python implementation for ``findall()``, except for their native tree iterator. @@ -386,49 +415,52 @@ of the lxml API you use. The most straight forward way is to call the ``xpath()`` method on an Element or ElementTree:: - lxe: xpath_method (--TC T1) 1.0180 msec/pass - lxe: xpath_method (--TC T2) 20.3521 msec/pass - lxe: xpath_method (--TC T3) 0.1259 msec/pass - lxe: xpath_method (--TC T4) 1.0169 msec/pass + lxe: xpath_method (--TC T1) 1.8251 msec/pass + lxe: xpath_method (--TC T2) 23.3159 msec/pass + lxe: xpath_method (--TC T3) 0.1378 msec/pass + lxe: xpath_method (--TC T4) 1.1270 msec/pass This is well suited for testing and when the XPath expressions are as diverse as the trees they are called on. However, if you have a single XPath expression that you want to apply to a larger number of different elements, the ``XPath`` class is the most efficient way to do it:: - lxe: xpath_class (--TC T1) 0.1891 msec/pass - lxe: xpath_class (--TC T2) 3.0179 msec/pass - lxe: xpath_class (--TC T3) 0.0570 msec/pass - lxe: xpath_class (--TC T4) 0.1910 msec/pass + lxe: xpath_class (--TC T1) 0.6981 msec/pass + lxe: xpath_class (--TC T2) 3.6111 msec/pass + lxe: xpath_class (--TC T3) 0.0591 msec/pass + lxe: xpath_class (--TC T4) 0.1979 msec/pass Note that this still allows you to use variables in the expression, so you can parse it once and then adapt it through variables at call time. In other cases, where you have a fixed Element or ElementTree and want to run different expressions on it, you should consider the ``XPathEvaluator``:: - lxe: xpath_element (--TR T1) 0.4089 msec/pass - lxe: xpath_element (--TR T2) 5.9960 msec/pass - lxe: xpath_element (--TR T3) 0.1230 msec/pass - lxe: xpath_element (--TR T4) 0.3440 msec/pass + lxe: xpath_element (--TR T1) 0.4342 msec/pass + lxe: xpath_element (--TR T2) 11.9958 msec/pass + lxe: xpath_element (--TR T3) 0.1690 msec/pass + lxe: xpath_element (--TR T4) 0.3510 msec/pass While it looks slightly slower, creating an XPath object for each of the expressions generates a much higher overhead here:: - lxe: xpath_class_repeat (--TC T1) 1.0259 msec/pass - lxe: xpath_class_repeat (--TC T2) 20.4861 msec/pass - lxe: xpath_class_repeat (--TC T3) 0.1280 msec/pass - lxe: xpath_class_repeat (--TC T4) 1.0269 msec/pass + lxe: xpath_class_repeat (--TC T1) 1.7619 msec/pass + lxe: xpath_class_repeat (--TC T2) 21.9102 msec/pass + lxe: xpath_class_repeat (--TC T3) 0.1330 msec/pass + lxe: xpath_class_repeat (--TC T4) 1.0631 msec/pass A longer example ================ -A while ago, Uche Ogbuji posted a `benchmark proposal`_ that would read in a -3MB XML version of the `Old Testament`_ of the Bible and look for the word -*begat* in all verses. Apparently, it is contained in 120 out of almost 24000 -verses. This is easy to implement in ElementTree using ``findall()``. -However, the fastest way to do this is obviously ``iterparse()``, as most of -the data is not of any interest. +... based on lxml 1.3. + +A while ago, Uche Ogbuji posted a `benchmark proposal`_ that would +read in a 3MB XML version of the `Old Testament`_ of the Bible and +look for the word *begat* in all verses. Apparently, it is contained +in 120 out of almost 24000 verses. This is easy to implement in +ElementTree using ``findall()``. However, the fastest and most memory +friendly way to do this is obviously ``iterparse()``, as most of the +data is not of any interest. .. _`benchmark proposal`: http://www.onlamp.com/pub/wlg/6291 .. _`Old Testament`: http://www.ibiblio.org/bosak/xml/eg/religion.2.00.xml.zip @@ -571,21 +603,21 @@ tree. It avoids step-by-step Python element instantiations along the path, which can substantially improve the access time:: - lxe: attribute (--TR T1) 10.6189 msec/pass - lxe: attribute (--TR T2) 53.7431 msec/pass - lxe: attribute (--TR T4) 10.3359 msec/pass - - lxe: objectpath (--TR T1) 5.8351 msec/pass - lxe: objectpath (--TR T2) 48.1579 msec/pass - lxe: objectpath (--TR T4) 5.6930 msec/pass - - lxe: attributes_deep (--TR T1) 58.7430 msec/pass - lxe: attributes_deep (--TR T2) 63.0901 msec/pass - lxe: attributes_deep (--TR T4) 17.4620 msec/pass - - lxe: objectpath_deep (--TR T1) 52.1719 msec/pass - lxe: objectpath_deep (--TR T2) 52.9201 msec/pass - lxe: objectpath_deep (--TR T4) 7.5650 msec/pass + lxe: attribute (--TR T1) 9.8128 msec/pass + lxe: attribute (--TR T2) 53.2899 msec/pass + lxe: attribute (--TR T4) 9.6800 msec/pass + + lxe: objectpath (--TR T1) 5.4898 msec/pass + lxe: objectpath (--TR T2) 48.4819 msec/pass + lxe: objectpath (--TR T4) 5.3761 msec/pass + + lxe: attributes_deep (--TR T1) 56.3290 msec/pass + lxe: attributes_deep (--TR T2) 62.4361 msec/pass + lxe: attributes_deep (--TR T4) 15.8000 msec/pass + + lxe: objectpath_deep (--TR T1) 49.0060 msec/pass + lxe: objectpath_deep (--TR T2) 52.5169 msec/pass + lxe: objectpath_deep (--TR T4) 7.1371 msec/pass Note, however, that parsing ObjectPath expressions is not for free either, so this is most effective for frequently accessing the same element. @@ -611,17 +643,17 @@ subtrees and elements) to cache, you can trade memory usage against access speed:: - lxe: attribute_cached (--TR T1) 7.9739 msec/pass - lxe: attribute_cached (--TR T2) 50.9331 msec/pass - lxe: attribute_cached (--TR T4) 7.8540 msec/pass - - lxe: attributes_deep_cached (--TR T1) 51.1391 msec/pass - lxe: attributes_deep_cached (--TR T2) 55.7129 msec/pass - lxe: attributes_deep_cached (--TR T4) 10.7968 msec/pass - - lxe: objectpath_deep_cached (--TR T1) 47.6151 msec/pass - lxe: objectpath_deep_cached (--TR T2) 48.0802 msec/pass - lxe: objectpath_deep_cached (--TR T4) 4.0281 msec/pass + lxe: attribute_cached (--TR T1) 7.6170 msec/pass + lxe: attribute_cached (--TR T2) 50.7941 msec/pass + lxe: attribute_cached (--TR T4) 7.4880 msec/pass + + lxe: attributes_deep_cached (--TR T1) 49.9220 msec/pass + lxe: attributes_deep_cached (--TR T2) 55.9340 msec/pass + lxe: attributes_deep_cached (--TR T4) 10.0131 msec/pass + + lxe: objectpath_deep_cached (--TR T1) 44.9121 msec/pass + lxe: objectpath_deep_cached (--TR T2) 48.2371 msec/pass + lxe: objectpath_deep_cached (--TR T4) 3.9630 msec/pass Things to note: you cannot currently use ``weakref.WeakKeyDictionary`` objects for this as lxml's element objects do not support weak references (which are From scoder at codespeak.net Thu Dec 20 18:32:33 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Dec 2007 18:32:33 +0100 (CET) Subject: [Lxml-checkins] r49957 - in lxml/trunk: . src/lxml Message-ID: <20071220173233.7F52C169EB3@codespeak.net> Author: scoder Date: Thu Dec 20 18:32:32 2007 New Revision: 49957 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/classlookup.pxi lxml/trunk/src/lxml/parser.pxi Log: r3165 at delle: sbehnel | 2007-12-20 17:57:48 +0100 deprecation in docs Modified: lxml/trunk/src/lxml/classlookup.pxi ============================================================================== --- lxml/trunk/src/lxml/classlookup.pxi (original) +++ lxml/trunk/src/lxml/classlookup.pxi Thu Dec 20 18:32:32 2007 @@ -299,7 +299,7 @@ LOOKUP_ELEMENT_CLASS = function def setElementClassLookup(ElementClassLookup lookup = None): - "Deprecated, use ``set_element_class_lookup(lookup)`` instead" + "@deprecated: use ``set_element_class_lookup(lookup)`` instead" set_element_class_lookup(lookup) def set_element_class_lookup(ElementClassLookup lookup = None): @@ -313,4 +313,4 @@ cdef ParserBasedElementClassLookup DEFAULT_ELEMENT_CLASS_LOOKUP DEFAULT_ELEMENT_CLASS_LOOKUP = ParserBasedElementClassLookup() -setElementClassLookup(DEFAULT_ELEMENT_CLASS_LOOKUP) +set_element_class_lookup(DEFAULT_ELEMENT_CLASS_LOOKUP) Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Thu Dec 20 18:32:32 2007 @@ -658,7 +658,7 @@ return "libxml2 %d.%d.%d" % LIBXML_VERSION def setElementClassLookup(self, ElementClassLookup lookup = None): - "Deprecated, use ``parser.set_element_class_lookup(lookup)`` instead." + "@deprecated: use ``parser.set_element_class_lookup(lookup)`` instead." self.set_element_class_lookup(lookup) def set_element_class_lookup(self, ElementClassLookup lookup = None): @@ -1510,11 +1510,11 @@ __GLOBAL_PARSER_CONTEXT.setDefaultParser(__DEFAULT_XML_PARSER) def setDefaultParser(parser=None): - "Deprecated, please use set_default_parser instead." + "@deprecated: please use set_default_parser instead." set_default_parser(parser) def getDefaultParser(): - "Deprecated, please use get_default_parser instead." + "@deprecated: please use get_default_parser instead." return get_default_parser() def set_default_parser(_BaseParser parser=None): From scoder at codespeak.net Thu Dec 20 18:32:36 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Dec 2007 18:32:36 +0100 (CET) Subject: [Lxml-checkins] r49958 - in lxml/trunk: . doc src/lxml Message-ID: <20071220173236.891B0169EB5@codespeak.net> Author: scoder Date: Thu Dec 20 18:32:36 2007 New Revision: 49958 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/performance.txt lxml/trunk/src/lxml/classlookup.pxi Log: r3166 at delle: sbehnel | 2007-12-20 18:32:27 +0100 tiny speedup in element instantiation Modified: lxml/trunk/doc/performance.txt ============================================================================== --- lxml/trunk/doc/performance.txt (original) +++ lxml/trunk/doc/performance.txt Thu Dec 20 18:32:36 2007 @@ -203,7 +203,7 @@ T1: 0.0914 0.0875 0.0872 0.0892 0.0882 0.0900 T2: 0.0894 0.0897 0.0892 0.0988 0.0978 0.0974 T3: 0.0219 0.0194 0.0189 0.0570 0.0570 0.0573 - T4: 0.0004 0.0004 0.0004 0.0012 0.0013 0.0012 + T4: 0.0004 0.0003 0.0003 0.0012 0.0012 0.0012 cET: -- S- U- -A SA UA T1: 0.0272 0.0264 0.0267 0.0268 0.0261 0.0265 T2: 0.0280 0.0274 0.0273 0.0273 0.0276 0.0275 @@ -367,19 +367,19 @@ especially if few elements are of interest or the target element tag name is known, lxml is a good choice:: - lxe: getiterator_all (--TR T1) 6.0360 msec/pass + lxe: getiterator_all (--TR T1) 5.8582 msec/pass cET: getiterator_all (--TR T1) 39.9489 msec/pass ET : getiterator_all (--TR T1) 23.0000 msec/pass - lxe: getiterator_islice (--TR T2) 0.0851 msec/pass + lxe: getiterator_islice (--TR T2) 0.0780 msec/pass cET: getiterator_islice (--TR T2) 0.3440 msec/pass ET : getiterator_islice (--TR T2) 0.2429 msec/pass - lxe: getiterator_tag (--TR T2) 0.3290 msec/pass + lxe: getiterator_tag (--TR T2) 0.3119 msec/pass cET: getiterator_tag (--TR T2) 14.1001 msec/pass ET : getiterator_tag (--TR T2) 7.4241 msec/pass - lxe: getiterator_tag_all (--TR T2) 0.7281 msec/pass + lxe: getiterator_tag_all (--TR T2) 0.6540 msec/pass cET: getiterator_tag_all (--TR T2) 40.7901 msec/pass ET : getiterator_tag_all (--TR T2) 21.0390 msec/pass Modified: lxml/trunk/src/lxml/classlookup.pxi ============================================================================== --- lxml/trunk/src/lxml/classlookup.pxi (original) +++ lxml/trunk/src/lxml/classlookup.pxi Thu Dec 20 18:32:36 2007 @@ -102,49 +102,50 @@ def __init__(self, element=None, comment=None, pi=None, entity=None): self._lookup_function = _lookupDefaultElementClass if element is None: - self.element_class = None + self.element_class = _Element elif issubclass(element, ElementBase): self.element_class = element else: raise TypeError, "element class must be subclass of ElementBase" if comment is None: - self.comment_class = None + self.comment_class = _Comment elif issubclass(comment, CommentBase): self.comment_class = comment else: raise TypeError, "comment class must be subclass of CommentBase" - if pi is None: - self.pi_class = None - elif issubclass(pi, PIBase): - self.pi_class = pi - else: - raise TypeError, "PI class must be subclass of PIBase" - if entity is None: - self.entity_class = None + self.entity_class = _Entity elif issubclass(entity, EntityBase): self.entity_class = entity else: raise TypeError, "Entity class must be subclass of EntityBase" + if pi is None: + self.pi_class = None # special case, see below + elif issubclass(pi, PIBase): + self.pi_class = pi + else: + raise TypeError, "PI class must be subclass of PIBase" + cdef object _lookupDefaultElementClass(state, _Document _doc, xmlNode* c_node): "Trivial class lookup function that always returns the default class." if c_node.type == tree.XML_ELEMENT_NODE: if state is not None: - cls = (state).element_class - if cls is None: - return _Element + return (state).element_class else: - return cls + return _Element elif c_node.type == tree.XML_COMMENT_NODE: if state is not None: - cls = (state).comment_class - if cls is None: + return (state).comment_class + else: return _Comment + elif c_node.type == tree.XML_ENTITY_REF_NODE: + if state is not None: + return (state).entity_class else: - return cls + return _Entity elif c_node.type == tree.XML_PI_NODE: if state is not None: cls = (state).pi_class @@ -158,13 +159,6 @@ return _ProcessingInstruction else: return cls - elif c_node.type == tree.XML_ENTITY_REF_NODE: - if state is not None: - cls = (state).entity_class - if cls is None: - return _Entity - else: - return cls else: assert 0, "Unknown node type: %s" % c_node.type @@ -220,12 +214,10 @@ self._lookup_function = _parser_class_lookup cdef object _parser_class_lookup(state, _Document doc, xmlNode* c_node): - cdef FallbackElementClassLookup lookup - lookup = state if doc._parser._class_lookup is not None: return doc._parser._class_lookup._lookup_function( doc._parser._class_lookup, doc, c_node) - return lookup._callFallback(doc, c_node) + return (state)._callFallback(doc, c_node) cdef class CustomElementClassLookup(FallbackElementClassLookup): From scoder at codespeak.net Thu Dec 20 18:48:07 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Dec 2007 18:48:07 +0100 (CET) Subject: [Lxml-checkins] r49961 - in lxml/trunk: . doc Message-ID: <20071220174807.AD861169E8A@codespeak.net> Author: scoder Date: Thu Dec 20 18:48:07 2007 New Revision: 49961 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/performance.txt Log: r3169 at delle: sbehnel | 2007-12-20 18:48:02 +0100 benchmark updates Modified: lxml/trunk/doc/performance.txt ============================================================================== --- lxml/trunk/doc/performance.txt (original) +++ lxml/trunk/doc/performance.txt Thu Dec 20 18:48:07 2007 @@ -240,11 +240,11 @@ This handicap is also visible when accessing single children:: - lxe: first_child (--TR T2) 0.3228 msec/pass + lxe: first_child (--TR T2) 0.2470 msec/pass cET: first_child (--TR T2) 0.2170 msec/pass ET : first_child (--TR T2) 0.9968 msec/pass - lxe: last_child (--TR T1) 0.3269 msec/pass + lxe: last_child (--TR T1) 0.2482 msec/pass cET: last_child (--TR T1) 0.2291 msec/pass ET : last_child (--TR T1) 0.9830 msec/pass @@ -253,11 +253,11 @@ The data structure used by libxml2 is a linked tree, and thus, a linked list of children:: - lxe: middle_child (--TR T1) 0.3638 msec/pass + lxe: middle_child (--TR T1) 0.2789 msec/pass cET: middle_child (--TR T1) 0.2229 msec/pass ET : middle_child (--TR T1) 1.0030 msec/pass - lxe: middle_child (--TR T2) 2.1780 msec/pass + lxe: middle_child (--TR T2) 1.9610 msec/pass cET: middle_child (--TR T2) 0.2229 msec/pass ET : middle_child (--TR T2) 0.9930 msec/pass @@ -277,11 +277,11 @@ are supposed to end up in, either as SubElements of an Element or using the explicit ``Element.makeelement()`` call:: - lxe: makeelement (--TC T2) 2.2941 msec/pass + lxe: makeelement (--TC T2) 2.2650 msec/pass cET: makeelement (--TC T2) 0.3211 msec/pass ET : makeelement (--TC T2) 1.6358 msec/pass - lxe: create_subelements (--TC T2) 2.1169 msec/pass + lxe: create_subelements (--TC T2) 1.9531 msec/pass cET: create_subelements (--TC T2) 0.2351 msec/pass ET : create_subelements (--TC T2) 3.2270 msec/pass @@ -323,7 +323,7 @@ or replacing the child slice by a newly created element:: - lxe: replace_children_element (--TC T1) 0.2520 msec/pass + lxe: replace_children_element (--TC T1) 0.2480 msec/pass cET: replace_children_element (--TC T1) 0.0238 msec/pass ET : replace_children_element (--TC T1) 0.1600 msec/pass From scoder at codespeak.net Thu Dec 20 19:25:38 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Dec 2007 19:25:38 +0100 (CET) Subject: [Lxml-checkins] r49963 - in lxml/trunk: . doc Message-ID: <20071220182538.6563B169EA9@codespeak.net> Author: scoder Date: Thu Dec 20 19:25:36 2007 New Revision: 49963 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/performance.txt Log: r3171 at delle: sbehnel | 2007-12-20 19:25:24 +0100 benchmark updates Modified: lxml/trunk/doc/performance.txt ============================================================================== --- lxml/trunk/doc/performance.txt (original) +++ lxml/trunk/doc/performance.txt Thu Dec 20 19:25:36 2007 @@ -385,19 +385,19 @@ This translates directly into similar timings for ``Element.findall()``:: - lxe: findall (--TR T2) 8.2440 msec/pass + lxe: findall (--TR T2) 8.1239 msec/pass cET: findall (--TR T2) 44.5340 msec/pass ET : findall (--TR T2) 27.1149 msec/pass - lxe: findall (--TR T3) 1.7269 msec/pass + lxe: findall (--TR T3) 1.6870 msec/pass cET: findall (--TR T3) 12.9611 msec/pass ET : findall (--TR T3) 8.6131 msec/pass - lxe: findall_tag (--TR T2) 0.8020 msec/pass + lxe: findall_tag (--TR T2) 0.7660 msec/pass cET: findall_tag (--TR T2) 40.6358 msec/pass ET : findall_tag (--TR T2) 21.4581 msec/pass - lxe: findall_tag (--TR T3) 0.2341 msec/pass + lxe: findall_tag (--TR T3) 0.2160 msec/pass cET: findall_tag (--TR T3) 9.6831 msec/pass ET : findall_tag (--TR T3) 5.2109 msec/pass From scoder at codespeak.net Thu Dec 20 19:35:43 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Dec 2007 19:35:43 +0100 (CET) Subject: [Lxml-checkins] r49964 - lxml/trunk Message-ID: <20071220183543.7D4C6169EAD@codespeak.net> Author: scoder Date: Thu Dec 20 19:35:41 2007 New Revision: 49964 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r3173 at delle: sbehnel | 2007-12-20 19:33:03 +0100 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Dec 20 19:35:41 2007 @@ -2,6 +2,21 @@ lxml changelog ============== +Under development +================= + +Features added +-------------- + +Bugs fixed +---------- + +Other changes +------------- + +* Minor performance tweaks + + 2.0alpha6 (2007-12-19) ====================== From scoder at codespeak.net Thu Dec 20 19:35:46 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Dec 2007 19:35:46 +0100 (CET) Subject: [Lxml-checkins] r49965 - lxml/trunk Message-ID: <20071220183546.ABE17169EB0@codespeak.net> Author: scoder Date: Thu Dec 20 19:35:46 2007 New Revision: 49965 Modified: lxml/trunk/ (props changed) lxml/trunk/version.txt Log: r3174 at delle: sbehnel | 2007-12-20 19:35:38 +0100 next release will be beta1 Modified: lxml/trunk/version.txt ============================================================================== --- lxml/trunk/version.txt (original) +++ lxml/trunk/version.txt Thu Dec 20 19:35:46 2007 @@ -1 +1 @@ -2.0alpha6 +2.0beta1 From scoder at codespeak.net Tue Dec 25 18:06:56 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 25 Dec 2007 18:06:56 +0100 (CET) Subject: [Lxml-checkins] r50103 - in lxml/trunk: . doc Message-ID: <20071225170656.5525F1684DA@codespeak.net> Author: scoder Date: Tue Dec 25 18:06:55 2007 New Revision: 50103 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/lxml2.txt Log: r3177 at delle: sbehnel | 2007-12-22 16:10:24 +0100 doc update Modified: lxml/trunk/doc/lxml2.txt ============================================================================== --- lxml/trunk/doc/lxml2.txt (original) +++ lxml/trunk/doc/lxml2.txt Tue Dec 25 18:06:55 2007 @@ -47,6 +47,18 @@ .. _`namespace implementation`: element_classes.html#implementing-namespaces +* Some API functions now require passing options as keyword arguments, + as opposed to positional arguments. This restriction was introduced + to make the API usage independent of future extensions such as the + addition of new positional arguments. Users should not rely on the + position of an optional argument in function signatures and instead + pass it explicitly named. This also improves code readability - it + is common good practice to pass options in a consistent way + independent of their position, so many people may not even notice + the change in their code. Another important reason is compatibility + with cElementTree, which also enforces keyword-only arguments in a + couple of places. + * XPath now raises exceptions specific to the part of the execution that failed: ``XPathSyntaxError`` for parser errors and ``XPathEvalError`` for errors that occurred during the evaluation. Note that the distinction only From scoder at codespeak.net Tue Dec 25 18:06:59 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 25 Dec 2007 18:06:59 +0100 (CET) Subject: [Lxml-checkins] r50104 - in lxml/trunk: . src/lxml Message-ID: <20071225170659.1BAB11684E0@codespeak.net> Author: scoder Date: Tue Dec 25 18:06:59 2007 New Revision: 50104 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/classlookup.pxi Log: r3178 at delle: sbehnel | 2007-12-25 14:57:47 +0100 moved code out of element instantiation fast path Modified: lxml/trunk/src/lxml/classlookup.pxi ============================================================================== --- lxml/trunk/src/lxml/classlookup.pxi (original) +++ lxml/trunk/src/lxml/classlookup.pxi Tue Dec 25 18:06:59 2007 @@ -78,10 +78,10 @@ """ self.fallback = lookup self._fallback_function = lookup._lookup_function - - cdef object _callFallback(self, doc, xmlNode* c_node): if self._fallback_function is NULL: self._fallback_function = _lookupDefaultElementClass + + cdef object _callFallback(self, _Document doc, xmlNode* c_node): return self._fallback_function(self.fallback, doc, c_node) From scoder at codespeak.net Tue Dec 25 18:07:04 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 25 Dec 2007 18:07:04 +0100 (CET) Subject: [Lxml-checkins] r50105 - in lxml/trunk: . src/lxml Message-ID: <20071225170704.9B63B1684E8@codespeak.net> Author: scoder Date: Tue Dec 25 18:07:04 2007 New Revision: 50105 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/apihelpers.pxi Log: r3179 at delle: sbehnel | 2007-12-25 15:06:26 +0100 cleanup and optimisations in _makeSubElement() Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Dec 25 18:07:04 2007 @@ -14,7 +14,8 @@ Other changes ------------- -* Minor performance tweaks +* Minor performance tweaks for Element instantiation and subelement + creation 2.0alpha6 (2007-12-19) Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Tue Dec 25 18:07:04 2007 @@ -138,30 +138,15 @@ attrib, nsmap, extra_attrs): """Create a new child element and initialize text content, namespaces and attributes. - - This helper function will reuse as much of the existing document as - possible: - - If 'parser' is None, the parser will be inherited from 'doc' or the - default parser will be used. - - If 'doc' is None, 'c_doc' is used to create a new _Document and the new - element is made its root node. - - If 'c_doc' is also NULL, a new xmlDoc will be created. """ - cdef _BaseParser parser - cdef _Document doc cdef xmlNode* c_node cdef xmlDoc* c_doc if parent is None or parent._doc is None: return None ns_utf, name_utf = _getNsTag(tag) - doc = parent._doc - c_doc = doc._c_doc + c_doc = parent._doc._c_doc - parser = doc._parser - if parser is not None and parser._for_html: + if parent._doc._parser is not None and parent._doc._parser._for_html: _htmlTagValidOrRaise(name_utf) else: _tagValidOrRaise(name_utf) @@ -171,24 +156,15 @@ python.PyErr_NoMemory() tree.xmlAddChild(parent._c_node, c_node) - try: - if text is not None: - _setNodeText(c_node, text) - if tail is not None: - _setTailText(c_node, tail) - - # add namespaces to node if necessary - doc._setNodeNamespaces(c_node, ns_utf, nsmap) - _initNodeAttributes(c_node, doc, attrib, extra_attrs) - return _elementFactory(doc, c_node) - except: - # free allocated c_node/c_doc unless Python does it for us - if c_node.doc is not c_doc: - # node not yet in document => will not be freed by document - if tail is not None: - _removeText(c_node.next) # tail - tree.xmlFreeNode(c_node) - raise + if text is not None: + _setNodeText(c_node, text) + if tail is not None: + _setTailText(c_node, tail) + + # add namespaces to node if necessary + parent._doc._setNodeNamespaces(c_node, ns_utf, nsmap) + _initNodeAttributes(c_node, parent._doc, attrib, extra_attrs) + return _elementFactory(parent._doc, c_node) cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra): """Initialise the attributes of an element node. From scoder at codespeak.net Tue Dec 25 18:07:07 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 25 Dec 2007 18:07:07 +0100 (CET) Subject: [Lxml-checkins] r50106 - in lxml/trunk: . src/lxml Message-ID: <20071225170707.029A21684E8@codespeak.net> Author: scoder Date: Tue Dec 25 18:07:07 2007 New Revision: 50106 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/_elementpath.py Log: r3180 at delle: sbehnel | 2007-12-25 15:54:59 +0100 typo Modified: lxml/trunk/src/lxml/_elementpath.py ============================================================================== --- lxml/trunk/src/lxml/_elementpath.py (original) +++ lxml/trunk/src/lxml/_elementpath.py Tue Dec 25 18:07:07 2007 @@ -125,7 +125,7 @@ if value[:1] == "'" or value[:1] == '"': value = value[1:-1] else: - raise SyntaxError("invalid comparision target") + raise SyntaxError("invalid comparison target") token = next() def select(result): for elem in result: From scoder at codespeak.net Tue Dec 25 18:07:11 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 25 Dec 2007 18:07:11 +0100 (CET) Subject: [Lxml-checkins] r50107 - in lxml/trunk: . src/lxml Message-ID: <20071225170711.D5AA91684ED@codespeak.net> Author: scoder Date: Tue Dec 25 18:07:11 2007 New Revision: 50107 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r3181 at delle: sbehnel | 2007-12-25 16:02:46 +0100 cleanup Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Dec 25 18:07:11 2007 @@ -1965,7 +1965,7 @@ (self._node_type != node._c_node.type or not _tagMatches(node._c_node, self._href, self._name)): # this cannot raise StopIteration, self._next_node != None - self.next() + self.__next__() def __iter__(self): return self From scoder at codespeak.net Fri Dec 28 13:25:27 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 28 Dec 2007 13:25:27 +0100 (CET) Subject: [Lxml-checkins] r50159 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20071228122527.8FE52168480@codespeak.net> Author: scoder Date: Fri Dec 28 13:25:27 2007 New Revision: 50159 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/tests/test_etree.py Log: r3187 at delle: sbehnel | 2007-12-28 13:23:37 +0100 make 'entity.text' return the textual representation, such as é Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Dec 28 13:25:27 2007 @@ -8,6 +8,9 @@ Features added -------------- +* ``entity.text`` now returns the textual representation of the + entity, e.g. ``&``. + Bugs fixed ---------- Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Dec 28 13:25:27 2007 @@ -1352,9 +1352,17 @@ def __set__(self, value): value = _utf8(value) + assert '&' not in value and ';' not in value, \ + "Invalid entity name '%s'" % value c_text = _cstr(value) tree.xmlNodeSetName(self._c_node, c_text) + property text: + # FIXME: should this be None or '&[VALUE];' or the resolved + # entity value ? + def __get__(self): + return '&%s;' % funicode(self._c_node.name) + def __repr__(self): return "&%s;" % self.name @@ -1940,10 +1948,10 @@ first pre-order). Note that this also includes comments, entities and processing instructions. To filter them out, check if the ``tag`` property of the returned element is a string (i.e. not None and not a - factory function). + factory function), or pass the ``Element`` factory for the ``tag`` keyword. - If the optional 'tag' argument is not None, the iterator returns only the - elements that match the respective name and namespace. + If the optional ``tag`` argument is not None, the iterator returns only + the elements that match the respective name and namespace. The optional boolean argument 'inclusive' defaults to True and can be set to False to exclude the start element itself. Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Fri Dec 28 13:25:27 2007 @@ -582,7 +582,7 @@ tree = parse(StringIO(xml), parser) root = tree.getroot() self.assertEquals(root[0].tag, Entity) - self.assertFalse(root[0].text) + self.assertEquals(root[0].text, "&myentity;") self.assertEquals(root[0].tail, None) self.assertEquals(root[0].name, "myentity") @@ -598,7 +598,7 @@ root.append( Entity("test") ) self.assertEquals(root[0].tag, Entity) - self.assertFalse(root[0].text) + self.assertEquals(root[0].text, "&test;") self.assertEquals(root[0].tail, None) self.assertEquals(root[0].name, "test") From scoder at codespeak.net Fri Dec 28 13:25:31 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 28 Dec 2007 13:25:31 +0100 (CET) Subject: [Lxml-checkins] r50160 - in lxml/trunk: . doc Message-ID: <20071228122531.0C743168487@codespeak.net> Author: scoder Date: Fri Dec 28 13:25:30 2007 New Revision: 50160 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/tutorial.txt Log: r3188 at delle: sbehnel | 2007-12-28 13:25:12 +0100 tutorial: show how to restrict iteration to Element objects Modified: lxml/trunk/doc/tutorial.txt ============================================================================== --- lxml/trunk/doc/tutorial.txt (original) +++ lxml/trunk/doc/tutorial.txt Fri Dec 28 13:25:30 2007 @@ -328,6 +328,37 @@ child - Child 1 child - Child 2 +By default, iteration yields all nodes in the tree, including +ProcessingInstructions, Comments and Entity instances. If you want to +make sure only Element objects are returned, you can pass the +``Element`` factory as tag parameter:: + + >>> root.append(etree.Entity("#234")) + >>> root.append(etree.Comment("some comment")) + + >>> for element in root.iter(): + ... if isinstance(element.tag, basestring): + ... print element.tag, '-', element.text + ... else: + ... print 'SPECIAL:', element, '-', element.text + root - None + child - Child 1 + child - Child 2 + another - Child 3 + SPECIAL: ê - ê + SPECIAL: - some comment + + >>> for element in root.iter(tag=etree.Element): + ... print element.tag, '-', element.text + root - None + child - Child 1 + child - Child 2 + another - Child 3 + + >>> for element in root.iter(tag=etree.Entity): + ... print element.text + ê + In lxml.etree, elements provide `further iterators`_ for all directions in the tree: children, parents (or rather ancestors) and siblings. From scoder at codespeak.net Fri Dec 28 18:50:35 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 28 Dec 2007 18:50:35 +0100 (CET) Subject: [Lxml-checkins] r50162 - in lxml/trunk: . src/lxml/tests Message-ID: <20071228175035.52CA41684F6@codespeak.net> Author: scoder Date: Fri Dec 28 18:50:33 2007 New Revision: 50162 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/tests/common_imports.py Log: r3191 at delle: sbehnel | 2007-12-28 18:50:25 +0100 skip (c)ET compatibility tests for older library versions Modified: lxml/trunk/src/lxml/tests/common_imports.py ============================================================================== --- lxml/trunk/src/lxml/tests/common_imports.py (original) +++ lxml/trunk/src/lxml/tests/common_imports.py Fri Dec 28 18:50:33 2007 @@ -13,6 +13,11 @@ except ImportError: ElementTree = None +if hasattr(ElementTree, 'VERSION'): + if tuple(ElementTree.VERSION.split('.')) < (1,3): + # compatibility tests require ET 1.3+ + ElementTree = None + try: import cElementTree # standard ET except ImportError: @@ -21,6 +26,11 @@ except ImportError: cElementTree = None +if hasattr(cElementTree, 'VERSION'): + if tuple(cElementTree.VERSION.split('.')) < (1,0,7): + # compatibility tests require cET 1.0.7+ + cElementTree = None + try: import doctest # check if the system version has everything we need From scoder at codespeak.net Sat Dec 29 17:14:32 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 29 Dec 2007 17:14:32 +0100 (CET) Subject: [Lxml-checkins] r50172 - in lxml/trunk: . src/lxml Message-ID: <20071229161432.88C5E1684CC@codespeak.net> Author: scoder Date: Sat Dec 29 17:14:31 2007 New Revision: 50172 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/xslt.pxi Log: r3193 at delle: sbehnel | 2007-12-29 15:39:03 +0100 cleanup, doc Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Sat Dec 29 17:14:31 2007 @@ -343,7 +343,7 @@ def __copy__(self): cdef XSLT new_xslt cdef xmlDoc* c_doc - new_xslt = NEW_XSLT(XSLT) + new_xslt = NEW_XSLT(XSLT) # without calling __init__() new_xslt._access_control = self._access_control new_xslt._error_log = _ErrorLog() new_xslt._context = self._context._copy() @@ -399,7 +399,7 @@ transform_ctxt._private = resolver_context c_result = self._run_transform( - input_doc, c_doc, _kw, context, transform_ctxt) + c_doc, _kw, context, transform_ctxt) if transform_ctxt.profile: c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt) @@ -438,7 +438,7 @@ result_doc = _documentFactory(c_result, input_doc._parser) return _xsltResultTreeFactory(result_doc, self, profile_doc) - cdef xmlDoc* _run_transform(self, _Document input_doc, xmlDoc* c_input_doc, + cdef xmlDoc* _run_transform(self, xmlDoc* c_input_doc, parameters, _XSLTContext context, xslt.xsltTransformContext* transform_ctxt): cdef xmlDoc* c_result From scoder at codespeak.net Sat Dec 29 17:14:36 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 29 Dec 2007 17:14:36 +0100 (CET) Subject: [Lxml-checkins] r50173 - in lxml/trunk: . src/lxml Message-ID: <20071229161436.7ED4C1684D9@codespeak.net> Author: scoder Date: Sat Dec 29 17:14:35 2007 New Revision: 50173 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/etree_defs.h Log: r3194 at delle: sbehnel | 2007-12-29 16:52:23 +0100 make explicit that tuple creation in PY_NEW is an unlikely case Modified: lxml/trunk/src/lxml/etree_defs.h ============================================================================== --- lxml/trunk/src/lxml/etree_defs.h (original) +++ lxml/trunk/src/lxml/etree_defs.h Sat Dec 29 17:14:35 2007 @@ -101,12 +101,23 @@ #define _cstr(s) PyString_AS_STRING(s) #define _fqtypename(o) (((PyTypeObject*)o)->ob_type->tp_name) +#ifdef __GNUC__ +/* Test for GCC > 2.95 */ +#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)) +#define unlikely_condition(x) __builtin_expect((x), 0) +#else /* __GNUC__ > 2 ... */ +#define unlikely_condition(x) (x) +#endif /* __GNUC__ > 2 ... */ +#else /* __GNUC__ */ +#define unlikely_condition(x) (x) +#endif /* __GNUC__ */ + static PyObject* __PY_NEW_GLOBAL_EMPTY_TUPLE = NULL; #define PY_NEW(T) \ (((PyTypeObject*)(T))->tp_new( \ (PyTypeObject*)(T), \ - ((__PY_NEW_GLOBAL_EMPTY_TUPLE == NULL) ? \ + (unlikely_condition(__PY_NEW_GLOBAL_EMPTY_TUPLE == NULL) ? \ (__PY_NEW_GLOBAL_EMPTY_TUPLE = PyTuple_New(0)) : \ (__PY_NEW_GLOBAL_EMPTY_TUPLE)), \ NULL)) From scoder at codespeak.net Sat Dec 29 17:14:39 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 29 Dec 2007 17:14:39 +0100 (CET) Subject: [Lxml-checkins] r50174 - in lxml/trunk: . benchmark Message-ID: <20071229161439.DEDBA1684FC@codespeak.net> Author: scoder Date: Sat Dec 29 17:14:39 2007 New Revision: 50174 Modified: lxml/trunk/ (props changed) lxml/trunk/benchmark/bench_etree.py Log: r3195 at delle: sbehnel | 2007-12-29 17:14:16 +0100 benchmark for result-free tree iteration Modified: lxml/trunk/benchmark/bench_etree.py ============================================================================== --- lxml/trunk/benchmark/bench_etree.py (original) +++ lxml/trunk/benchmark/bench_etree.py Sat Dec 29 17:14:39 2007 @@ -293,6 +293,9 @@ def bench_getiterator_tag_all(self, root): list(root.getiterator(self.SEARCH_TAG)) + def bench_getiterator_tag_none(self, root): + list(root.getiterator("{ThisShould}NeverExist")) + def bench_getiterator_tag_text(self, root): [ e.text for e in root.getiterator(self.SEARCH_TAG) ] From scoder at codespeak.net Sat Dec 29 20:10:39 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 29 Dec 2007 20:10:39 +0100 (CET) Subject: [Lxml-checkins] r50179 - in lxml/trunk: . src/lxml Message-ID: <20071229191039.81EC3168550@codespeak.net> Author: scoder Date: Sat Dec 29 20:10:39 2007 New Revision: 50179 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/xslt.pxi Log: r3199 at delle: sbehnel | 2007-12-29 20:09:02 +0100 cleanup Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Sat Dec 29 20:10:39 2007 @@ -461,15 +461,15 @@ try: i = 0 keep_ref = [] - for key, value in parameters.iteritems(): + for key, value in parameters.items(): k = _utf8(key) python.PyList_Append(keep_ref, k) v = _utf8(value) python.PyList_Append(keep_ref, v) params[i] = _cstr(k) - i = i + 1 + i += 1 params[i] = _cstr(v) - i = i + 1 + i += 1 except: python.PyMem_Free(params) raise From scoder at codespeak.net Sat Dec 29 20:10:43 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 29 Dec 2007 20:10:43 +0100 (CET) Subject: [Lxml-checkins] r50180 - in lxml/trunk: . src/lxml Message-ID: <20071229191043.2F095168551@codespeak.net> Author: scoder Date: Sat Dec 29 20:10:42 2007 New Revision: 50180 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/etree_defs.h Log: r3200 at delle: sbehnel | 2007-12-29 20:10:29 +0100 compilation without threading support adapted to code generated for 'with nogil' block Modified: lxml/trunk/src/lxml/etree_defs.h ============================================================================== --- lxml/trunk/src/lxml/etree_defs.h (original) +++ lxml/trunk/src/lxml/etree_defs.h Sat Dec 29 20:10:42 2007 @@ -28,6 +28,10 @@ # define PyEval_RestoreThread(state) # define PyGILState_Ensure() (PyGILState_UNLOCKED) # define PyGILState_Release(state) +# undef Py_UNBLOCK_THREADS +# define Py_UNBLOCK_THREADS +# undef Py_BLOCK_THREADS +# define Py_BLOCK_THREADS #endif #ifdef WITHOUT_THREADING