From scoder at codespeak.net Mon Oct 9 11:04:30 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 9 Oct 2006 11:04:30 +0200 (CEST) Subject: [Lxml-checkins] r33036 - lxml/pyrex/dist Message-ID: <20061009090430.F3D7310079@code0.codespeak.net> Author: scoder Date: Mon Oct 9 11:04:29 2006 New Revision: 33036 Removed: lxml/pyrex/dist/Pyrex-0.9.3.1-1.noarch.rpm lxml/pyrex/dist/Pyrex-0.9.3.1-1.src.rpm lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz Log: outdated and removed Deleted: /lxml/pyrex/dist/Pyrex-0.9.3.1-1.noarch.rpm ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/dist/Pyrex-0.9.3.1-1.src.rpm ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz ============================================================================== Binary file. No diff available. From scoder at codespeak.net Mon Oct 9 11:04:07 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 9 Oct 2006 11:04:07 +0200 (CEST) Subject: [Lxml-checkins] r33035 - lxml/pyrex/Pyrex/Compiler Message-ID: <20061009090407.E496710078@code0.codespeak.net> Author: scoder Date: Mon Oct 9 11:04:05 2006 New Revision: 33035 Modified: lxml/pyrex/Pyrex/Compiler/Nodes.py Log: small fix for C++ Modified: lxml/pyrex/Pyrex/Compiler/Nodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Nodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/Nodes.py Mon Oct 9 11:04:05 2006 @@ -3782,7 +3782,7 @@ /* Normalize to raise , */ Py_DECREF(value); value = type; - type = type->ob_type; + type = (PyObject*) type->ob_type; Py_INCREF(type); } } From scoder at codespeak.net Fri Oct 13 17:38:50 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 13 Oct 2006 17:38:50 +0200 (CEST) Subject: [Lxml-checkins] r33264 - in lxml/trunk: . src/lxml Message-ID: <20061013153850.89DD3100EB@code0.codespeak.net> Author: scoder Date: Fri Oct 13 17:38:48 2006 New Revision: 33264 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/proxy.pxi Log: memory leak when garbage collecting tailed root elements Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Oct 13 17:38:48 2006 @@ -15,6 +15,8 @@ Bugs fixed ---------- +* Memory leak when garbage collecting tailed root elements + * HTML script/style content was not propagated to .text * Show text xincluded between text nodes correctly in .text and .tail Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Fri Oct 13 17:38:48 2006 @@ -104,6 +104,7 @@ c_top = getDeallocationTop(c_node) if c_top is not NULL: #print "freeing:", c_top.name + _removeText(c_top.next) # tail tree.xmlFreeNode(c_top) cdef xmlNode* getDeallocationTop(xmlNode* c_node): From scoder at codespeak.net Fri Oct 13 17:48:35 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 13 Oct 2006 17:48:35 +0200 (CEST) Subject: [Lxml-checkins] r33265 - in lxml/trunk: . src/lxml Message-ID: <20061013154835.744FA100E9@code0.codespeak.net> Author: scoder Date: Fri Oct 13 17:48:32 2006 New Revision: 33265 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/xslt.pxi Log: memory leak for external URLs in _XSLTProcessingInstruction.parseXSL() Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Oct 13 17:48:32 2006 @@ -15,6 +15,8 @@ Bugs fixed ---------- +* Memory leak for external URLs in _XSLTProcessingInstruction.parseXSL() + * Memory leak when garbage collecting tailed root elements * HTML script/style content was not propagated to .text Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Fri Oct 13 17:48:32 2006 @@ -582,9 +582,12 @@ c_href = tree.xmlBuildURI( c_href, tree.xmlNodeGetBase(self._c_node.doc, self._c_node)) - if c_href is NULL: - c_href = _cstr(href_utf) - result_doc = _parseDocument(funicode(c_href), parser) + if c_href is not NULL: + href = funicode(c_href) + tree.xmlFree(c_href) + else: + href = funicode(_cstr(href_utf)) + result_doc = _parseDocument(href, parser) return _elementTreeFactory(result_doc, None) # ID reference to embedded stylesheet From scoder at codespeak.net Fri Oct 13 17:51:47 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 13 Oct 2006 17:51:47 +0200 (CEST) Subject: [Lxml-checkins] r33266 - in lxml/trunk: . src/lxml Message-ID: <20061013155147.5B61F100E9@code0.codespeak.net> Author: scoder Date: Fri Oct 13 17:51:44 2006 New Revision: 33266 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/etreepublic.pxd lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/public-api.pxi Log: public C-API function makeElement() to create a new _Element with text, tail, attributes and namespaces Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Oct 13 17:51:44 2006 @@ -8,6 +8,9 @@ Features added -------------- +* New C-API function makeElement() to create new elements with text, + tail, attributes and namespaces + * Reuse original parser flags for XInclude * Simplified support for handling XSLT processing instructions Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Fri Oct 13 17:51:44 2006 @@ -81,8 +81,10 @@ return None cdef _Element _makeElement(tag, xmlDoc* c_doc, _Document doc, - _BaseParser parser, attrib, nsmap, extra_attrs): - """Create a new element and initialize namespaces and attributes. + _BaseParser parser, text, tail, attrib, nsmap, + extra_attrs): + """Create a new element and initialize text content, namespaces and + attributes. This helper function will reuse as much of the existing document as possible: @@ -102,13 +104,52 @@ elif c_doc is NULL: c_doc = _newDoc() c_node = _createElement(c_doc, name_utf) - if doc is None: - tree.xmlDocSetRootElement(c_doc, c_node) - doc = _documentFactory(c_doc, parser) - # add namespaces to node if necessary - doc._setNodeNamespaces(c_node, ns_utf, nsmap) - _initNodeAttributes(c_node, doc, attrib, extra_attrs) - return _elementFactory(doc, c_node) + try: + if text is not None: + _setNodeText(c_node, text) + if tail is not None: + _setTailText(c_node, tail) + if doc is None: + tree.xmlDocSetRootElement(c_doc, c_node) + doc = _documentFactory(c_doc, parser) + # add namespaces to node if necessary + doc._setNodeNamespaces(c_node, ns_utf, nsmap) + _initNodeAttributes(c_node, doc, attrib, extra_attrs) + return _elementFactory(doc, c_node) + except: + # free allocated c_node/c_doc unless Python does it for us + if c_node.doc is not c_doc: + # node not yet in document => will not be freed by document + if tail is not None: + _removeText(c_node.next) # tail + tree.xmlFreeNode(c_node) + if doc is None: + # c_doc will not be freed by doc + tree.xmlFreeDoc(c_doc) + raise + +cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra): + """Initialise the attributes of an element node. + """ + cdef xmlNs* c_ns + # 'extra' is not checked here (expected to be a keyword dict) + if attrib is not None and not hasattr(attrib, 'items'): + raise TypeError, "Invalid attribute dictionary: %s" % type(attrib) + if extra is not None and extra: + if attrib is None: + attrib = extra + else: + attrib.update(extra) + if attrib: + for name, value in attrib.items(): + attr_ns_utf, attr_name_utf = _getNsTag(name) + value_utf = _utf8(value) + if attr_ns_utf is None: + tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf)) + else: + c_ns = doc._findOrBuildNodeNs(c_node, _cstr(attr_ns_utf)) + tree.xmlNewNsProp(c_node, c_ns, + _cstr(attr_name_utf), _cstr(value_utf)) cdef object _attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): cdef char* value Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Fri Oct 13 17:51:44 2006 @@ -1153,7 +1153,8 @@ def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): """Creates a new element associated with the same document. """ - return _makeElement(_tag, NULL, self._doc, None, attrib, nsmap, _extra) + return _makeElement(_tag, NULL, self._doc, None, None, None, + attrib, nsmap, _extra) def find(self, path): """Finds the first matching subelement, by tag name or path. @@ -1565,35 +1566,15 @@ c_node = tree.xmlNewDocPI(c_doc, target, text) return c_node -cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra): - cdef xmlNs* c_ns - # 'extra' is not checked here (expected to be a keyword dict) - if attrib is not None and not hasattr(attrib, 'items'): - raise TypeError, "Invalid attribute dictionary: %s" % type(attrib) - if extra: - if attrib is None: - attrib = extra - else: - attrib.update(extra) - if attrib: - for name, value in attrib.items(): - attr_ns_utf, attr_name_utf = _getNsTag(name) - value_utf = _utf8(value) - if attr_ns_utf is None: - tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf)) - else: - c_ns = doc._findOrBuildNodeNs(c_node, _cstr(attr_ns_utf)) - tree.xmlNewNsProp(c_node, c_ns, - _cstr(attr_name_utf), _cstr(value_utf)) - - # module-level API for ElementTree def Element(_tag, attrib=None, nsmap=None, **_extra): - """Element factory. This function returns an object implementing the Element interface. + """Element factory. This function returns an object implementing the + Element interface. """ ### also look at _Element.makeelement() and _BaseParser.makeelement() ### - return _makeElement(_tag, NULL, None, None, attrib, nsmap, _extra) + return _makeElement(_tag, NULL, None, None, None, None, + attrib, nsmap, _extra) def Comment(text=None): """Comment element factory. This factory function creates a special element that will Modified: lxml/trunk/src/lxml/etreepublic.pxd ============================================================================== --- lxml/trunk/src/lxml/etreepublic.pxd (original) +++ lxml/trunk/src/lxml/etreepublic.pxd Fri Oct 13 17:51:44 2006 @@ -61,7 +61,12 @@ # create an ElementTree subclass for an Element cdef _ElementTree newElementTree(_NodeBase context_node, object subclass) - # deep copy a node to include in in the Document + # create a new Element for an existing or new document (doc = None) + # builds Python object after setting text, tail, namespaces and attributes + cdef _Element makeElement(tag, _Document doc, parser, + text, tail, attrib, nsmap) + + # deep copy a node to include it in the Document cdef _Element deepcopyNodeToDocument(_Document doc, tree.xmlNode* c_root) # set the internal lookup function for Element/Comment/PI classes Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Fri Oct 13 17:51:44 2006 @@ -420,7 +420,8 @@ def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): """Creates a new element associated with this parser. """ - return _makeElement(_tag, NULL, None, self, attrib, nsmap, _extra) + return _makeElement(_tag, NULL, None, self, None, None, + attrib, nsmap, _extra) cdef xmlDoc* _parseUnicodeDoc(self, utext, char* c_filename) except NULL: """Parse unicode document, share dictionary if possible. Modified: lxml/trunk/src/lxml/public-api.pxi ============================================================================== --- lxml/trunk/src/lxml/public-api.pxi (original) +++ lxml/trunk/src/lxml/public-api.pxi Fri Oct 13 17:51:44 2006 @@ -21,6 +21,10 @@ raise TypeError return _elementFactory(doc, c_node) +cdef public _Element makeElement(tag, _Document doc, parser, + text, tail, attrib, nsmap): + return _makeElement(tag, NULL, doc, parser, text, tail, attrib, nsmap, None) + cdef public void setElementClassLookupFunction( _element_class_lookup_function function, state): _setElementClassLookupFunction(function, state) From scoder at codespeak.net Fri Oct 13 17:54:14 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 13 Oct 2006 17:54:14 +0200 (CEST) Subject: [Lxml-checkins] r33267 - lxml/trunk/src/lxml Message-ID: <20061013155414.A8DAC100E9@code0.codespeak.net> Author: scoder Date: Fri Oct 13 17:54:13 2006 New Revision: 33267 Modified: lxml/trunk/src/lxml/objectify.pyx Log: use public makeElement() C function to create elements with text and tail in one shot Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Fri Oct 13 17:54:13 2006 @@ -1461,17 +1461,16 @@ Call without arguments to reset to the original parser. """ - global parser, _makeelement + global parser if new_parser is None: parser = __DEFAULT_PARSER elif isinstance(new_parser, etree.XMLParser): parser = new_parser else: raise TypeError, "parser must inherit from lxml.etree.XMLParser" - _makeelement = parser.makeelement -cdef object _makeelement -_makeelement = parser.makeelement +cdef _Element _makeElement(tag, text, attrib, nsmap): + return cetree.makeElement(tag, None, parser, text, None, attrib, nsmap) ################################################################################ # Module level factory functions @@ -1501,7 +1500,7 @@ if _pytype is None: _pytype = TREE_PYTYPE _attributes[PYTYPE_ATTRIBUTE] = _pytype - return _makeelement(_tag, _attributes, nsmap) + return _makeElement(_tag, None, _attributes, nsmap) def DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, **_attributes): @@ -1549,6 +1548,4 @@ if _pytype is not None: python.PyDict_SetItem(_attributes, PYTYPE_ATTRIBUTE, _pytype) - element = _makeelement("value", _attributes, nsmap) - cetree.setNodeText(element._c_node, strval) - return element + return _makeElement("value", strval, _attributes, nsmap) From scoder at codespeak.net Fri Oct 13 19:18:28 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 13 Oct 2006 19:18:28 +0200 (CEST) Subject: [Lxml-checkins] r33271 - in lxml/branch/lxml-1.1: . src/lxml Message-ID: <20061013171828.BA1CB100EC@code0.codespeak.net> Author: scoder Date: Fri Oct 13 19:18:26 2006 New Revision: 33271 Modified: lxml/branch/lxml-1.1/CHANGES.txt lxml/branch/lxml-1.1/src/lxml/apihelpers.pxi lxml/branch/lxml-1.1/src/lxml/etree.pyx lxml/branch/lxml-1.1/src/lxml/etreepublic.pxd lxml/branch/lxml-1.1/src/lxml/objectify.pyx lxml/branch/lxml-1.1/src/lxml/parser.pxi lxml/branch/lxml-1.1/src/lxml/proxy.pxi lxml/branch/lxml-1.1/src/lxml/public-api.pxi lxml/branch/lxml-1.1/src/lxml/xslt.pxi Log: merged in fixes from trunk Modified: lxml/branch/lxml-1.1/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.1/CHANGES.txt (original) +++ lxml/branch/lxml-1.1/CHANGES.txt Fri Oct 13 19:18:26 2006 @@ -8,6 +8,9 @@ Features added -------------- +* New C-API function makeElement() to create new elements with text, + tail, attributes and namespaces + * Reuse original parser flags for XInclude * Simplified support for handling XSLT processing instructions @@ -15,6 +18,10 @@ Bugs fixed ---------- +* Memory leak for external URLs in _XSLTProcessingInstruction.parseXSL() + +* Memory leak when garbage collecting tailed root elements + * HTML script/style content was not propagated to .text * Show text xincluded between text nodes correctly in .text and .tail Modified: lxml/branch/lxml-1.1/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/apihelpers.pxi (original) +++ lxml/branch/lxml-1.1/src/lxml/apihelpers.pxi Fri Oct 13 19:18:26 2006 @@ -81,8 +81,10 @@ return None cdef _Element _makeElement(tag, xmlDoc* c_doc, _Document doc, - _BaseParser parser, attrib, nsmap, extra_attrs): - """Create a new element and initialize namespaces and attributes. + _BaseParser parser, text, tail, attrib, nsmap, + extra_attrs): + """Create a new element and initialize text content, namespaces and + attributes. This helper function will reuse as much of the existing document as possible: @@ -102,13 +104,52 @@ elif c_doc is NULL: c_doc = _newDoc() c_node = _createElement(c_doc, name_utf) - if doc is None: - tree.xmlDocSetRootElement(c_doc, c_node) - doc = _documentFactory(c_doc, parser) - # add namespaces to node if necessary - doc._setNodeNamespaces(c_node, ns_utf, nsmap) - _initNodeAttributes(c_node, doc, attrib, extra_attrs) - return _elementFactory(doc, c_node) + try: + if text is not None: + _setNodeText(c_node, text) + if tail is not None: + _setTailText(c_node, tail) + if doc is None: + tree.xmlDocSetRootElement(c_doc, c_node) + doc = _documentFactory(c_doc, parser) + # add namespaces to node if necessary + doc._setNodeNamespaces(c_node, ns_utf, nsmap) + _initNodeAttributes(c_node, doc, attrib, extra_attrs) + return _elementFactory(doc, c_node) + except: + # free allocated c_node/c_doc unless Python does it for us + if c_node.doc is not c_doc: + # node not yet in document => will not be freed by document + if tail is not None: + _removeText(c_node.next) # tail + tree.xmlFreeNode(c_node) + if doc is None: + # c_doc will not be freed by doc + tree.xmlFreeDoc(c_doc) + raise + +cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra): + """Initialise the attributes of an element node. + """ + cdef xmlNs* c_ns + # 'extra' is not checked here (expected to be a keyword dict) + if attrib is not None and not hasattr(attrib, 'items'): + raise TypeError, "Invalid attribute dictionary: %s" % type(attrib) + if extra is not None and extra: + if attrib is None: + attrib = extra + else: + attrib.update(extra) + if attrib: + for name, value in attrib.items(): + attr_ns_utf, attr_name_utf = _getNsTag(name) + value_utf = _utf8(value) + if attr_ns_utf is None: + tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf)) + else: + c_ns = doc._findOrBuildNodeNs(c_node, _cstr(attr_ns_utf)) + tree.xmlNewNsProp(c_node, c_ns, + _cstr(attr_name_utf), _cstr(value_utf)) cdef object _attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): cdef char* value Modified: lxml/branch/lxml-1.1/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.1/src/lxml/etree.pyx Fri Oct 13 19:18:26 2006 @@ -1152,7 +1152,8 @@ def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): """Creates a new element associated with the same document. """ - return _makeElement(_tag, NULL, self._doc, None, attrib, nsmap, _extra) + return _makeElement(_tag, NULL, self._doc, None, None, None, + attrib, nsmap, _extra) def find(self, path): """Finds the first matching subelement, by tag name or path. @@ -1564,35 +1565,15 @@ c_node = tree.xmlNewDocPI(c_doc, target, text) return c_node -cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra): - cdef xmlNs* c_ns - # 'extra' is not checked here (expected to be a keyword dict) - if attrib is not None and not hasattr(attrib, 'items'): - raise TypeError, "Invalid attribute dictionary: %s" % type(attrib) - if extra: - if attrib is None: - attrib = extra - else: - attrib.update(extra) - if attrib: - for name, value in attrib.items(): - attr_ns_utf, attr_name_utf = _getNsTag(name) - value_utf = _utf8(value) - if attr_ns_utf is None: - tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf)) - else: - c_ns = doc._findOrBuildNodeNs(c_node, _cstr(attr_ns_utf)) - tree.xmlNewNsProp(c_node, c_ns, - _cstr(attr_name_utf), _cstr(value_utf)) - - # module-level API for ElementTree def Element(_tag, attrib=None, nsmap=None, **_extra): - """Element factory. This function returns an object implementing the Element interface. + """Element factory. This function returns an object implementing the + Element interface. """ ### also look at _Element.makeelement() and _BaseParser.makeelement() ### - return _makeElement(_tag, NULL, None, None, attrib, nsmap, _extra) + return _makeElement(_tag, NULL, None, None, None, None, + attrib, nsmap, _extra) def Comment(text=None): """Comment element factory. This factory function creates a special element that will Modified: lxml/branch/lxml-1.1/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/etreepublic.pxd (original) +++ lxml/branch/lxml-1.1/src/lxml/etreepublic.pxd Fri Oct 13 19:18:26 2006 @@ -61,7 +61,12 @@ # create an ElementTree subclass for an Element cdef _ElementTree newElementTree(_NodeBase context_node, object subclass) - # deep copy a node to include in in the Document + # create a new Element for an existing or new document (doc = None) + # builds Python object after setting text, tail, namespaces and attributes + cdef _Element makeElement(tag, _Document doc, parser, + text, tail, attrib, nsmap) + + # deep copy a node to include it in the Document cdef _Element deepcopyNodeToDocument(_Document doc, tree.xmlNode* c_root) # set the internal lookup function for Element/Comment/PI classes Modified: lxml/branch/lxml-1.1/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/objectify.pyx (original) +++ lxml/branch/lxml-1.1/src/lxml/objectify.pyx Fri Oct 13 19:18:26 2006 @@ -1461,17 +1461,16 @@ Call without arguments to reset to the original parser. """ - global parser, _makeelement + global parser if new_parser is None: parser = __DEFAULT_PARSER elif isinstance(new_parser, etree.XMLParser): parser = new_parser else: raise TypeError, "parser must inherit from lxml.etree.XMLParser" - _makeelement = parser.makeelement -cdef object _makeelement -_makeelement = parser.makeelement +cdef _Element _makeElement(tag, text, attrib, nsmap): + return cetree.makeElement(tag, None, parser, text, None, attrib, nsmap) ################################################################################ # Module level factory functions @@ -1501,7 +1500,7 @@ if _pytype is None: _pytype = TREE_PYTYPE _attributes[PYTYPE_ATTRIBUTE] = _pytype - return _makeelement(_tag, _attributes, nsmap) + return _makeElement(_tag, None, _attributes, nsmap) def DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, **_attributes): @@ -1549,6 +1548,4 @@ if _pytype is not None: python.PyDict_SetItem(_attributes, PYTYPE_ATTRIBUTE, _pytype) - element = _makeelement("value", _attributes, nsmap) - cetree.setNodeText(element._c_node, strval) - return element + return _makeElement("value", strval, _attributes, nsmap) Modified: lxml/branch/lxml-1.1/src/lxml/parser.pxi ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/parser.pxi (original) +++ lxml/branch/lxml-1.1/src/lxml/parser.pxi Fri Oct 13 19:18:26 2006 @@ -420,7 +420,8 @@ def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): """Creates a new element associated with this parser. """ - return _makeElement(_tag, NULL, None, self, attrib, nsmap, _extra) + return _makeElement(_tag, NULL, None, self, None, None, + attrib, nsmap, _extra) cdef xmlDoc* _parseUnicodeDoc(self, utext, char* c_filename) except NULL: """Parse unicode document, share dictionary if possible. Modified: lxml/branch/lxml-1.1/src/lxml/proxy.pxi ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/proxy.pxi (original) +++ lxml/branch/lxml-1.1/src/lxml/proxy.pxi Fri Oct 13 19:18:26 2006 @@ -104,6 +104,7 @@ c_top = getDeallocationTop(c_node) if c_top is not NULL: #print "freeing:", c_top.name + _removeText(c_top.next) # tail tree.xmlFreeNode(c_top) cdef xmlNode* getDeallocationTop(xmlNode* c_node): Modified: lxml/branch/lxml-1.1/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/public-api.pxi (original) +++ lxml/branch/lxml-1.1/src/lxml/public-api.pxi Fri Oct 13 19:18:26 2006 @@ -21,6 +21,10 @@ raise TypeError return _elementFactory(doc, c_node) +cdef public _Element makeElement(tag, _Document doc, parser, + text, tail, attrib, nsmap): + return _makeElement(tag, NULL, doc, parser, text, tail, attrib, nsmap, None) + cdef public void setElementClassLookupFunction( _element_class_lookup_function function, state): _setElementClassLookupFunction(function, state) Modified: lxml/branch/lxml-1.1/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/xslt.pxi (original) +++ lxml/branch/lxml-1.1/src/lxml/xslt.pxi Fri Oct 13 19:18:26 2006 @@ -582,9 +582,12 @@ c_href = tree.xmlBuildURI( c_href, tree.xmlNodeGetBase(self._c_node.doc, self._c_node)) - if c_href is NULL: - c_href = _cstr(href_utf) - result_doc = _parseDocument(funicode(c_href), parser) + if c_href is not NULL: + href = funicode(c_href) + tree.xmlFree(c_href) + else: + href = funicode(_cstr(href_utf)) + result_doc = _parseDocument(href, parser) return _elementTreeFactory(result_doc, None) # ID reference to embedded stylesheet From scoder at codespeak.net Tue Oct 17 19:07:01 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 17 Oct 2006 19:07:01 +0200 (CEST) Subject: [Lxml-checkins] r33385 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20061017170701.9209A1007D@code0.codespeak.net> Author: scoder Date: Tue Oct 17 19:06:54 2006 New Revision: 33385 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_etree.py Log: fix: deep-copying Comments and PIs Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Oct 17 19:06:54 2006 @@ -18,6 +18,8 @@ Bugs fixed ---------- +* Copying Comments and ProcessingInstructions failed + * Memory leak for external URLs in _XSLTProcessingInstruction.parseXSL() * Memory leak when garbage collecting tailed root elements Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Tue Oct 17 19:06:54 2006 @@ -716,10 +716,20 @@ def __copy__(self): cdef xmlDoc* c_doc + cdef xmlNode* c_node cdef _Document new_doc c_doc = _copyDocRoot(self._doc._c_doc, self._c_node) # recursive new_doc = _documentFactory(c_doc, self._doc._parser) - return new_doc.getroot() + root = new_doc.getroot() + if root is not None: + return root + # Comment/PI + c_node = c_doc.children + while c_node is not NULL and c_node.type != self._c_node.type: + c_node = c_node.next + if c_node is NULL: + return None + return _elementFactory(new_doc, c_node) def set(self, key, value): """Sets an element attribute. Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Tue Oct 17 19:06:54 2006 @@ -2242,6 +2242,17 @@ self.assertEquals('', tostring(b).replace(' ', '')) + def test_deepcopy_comment(self): + # previously caused a crash + Comment = self.etree.Comment + + a = Comment("ONE") + b = copy.deepcopy(a) + b.text = "ANOTHER" + + self.assertEquals('ONE', a.text) + self.assertEquals('ANOTHER', b.text) + def test_shallowcopy(self): Element = self.etree.Element Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Tue Oct 17 19:06:54 2006 @@ -8,7 +8,7 @@ """ -import unittest, doctest +import unittest, doctest, copy from common_imports import etree, StringIO, HelperTestCase, fileInTestDir from common_imports import SillyFileLike, canonicalize @@ -82,6 +82,17 @@ self.assertEquals(root[0].target, "mypi") self.assertEquals(root[0].text, "my test ") + def test_deepcopy_pi(self): + # previously caused a crash + ProcessingInstruction = self.etree.ProcessingInstruction + + a = ProcessingInstruction("PI", "ONE") + b = copy.deepcopy(a) + b.text = "ANOTHER" + + self.assertEquals('ONE', a.text) + self.assertEquals('ANOTHER', b.text) + def test_attribute_set(self): # ElementTree accepts arbitrary attribute values # lxml.etree allows only strings From scoder at codespeak.net Tue Oct 17 19:08:08 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 17 Oct 2006 19:08:08 +0200 (CEST) Subject: [Lxml-checkins] r33386 - in lxml/branch/lxml-1.1: . src/lxml src/lxml/tests Message-ID: <20061017170808.253B01007D@code0.codespeak.net> Author: scoder Date: Tue Oct 17 19:08:06 2006 New Revision: 33386 Modified: lxml/branch/lxml-1.1/CHANGES.txt lxml/branch/lxml-1.1/src/lxml/etree.pyx lxml/branch/lxml-1.1/src/lxml/tests/test_elementtree.py lxml/branch/lxml-1.1/src/lxml/tests/test_etree.py Log: fix: deep-copying Comments and PIs Modified: lxml/branch/lxml-1.1/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.1/CHANGES.txt (original) +++ lxml/branch/lxml-1.1/CHANGES.txt Tue Oct 17 19:08:06 2006 @@ -18,6 +18,8 @@ Bugs fixed ---------- +* Copying Comments and ProcessingInstructions failed + * Memory leak for external URLs in _XSLTProcessingInstruction.parseXSL() * Memory leak when garbage collecting tailed root elements Modified: lxml/branch/lxml-1.1/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.1/src/lxml/etree.pyx Tue Oct 17 19:08:06 2006 @@ -715,10 +715,20 @@ def __copy__(self): cdef xmlDoc* c_doc + cdef xmlNode* c_node cdef _Document new_doc c_doc = _copyDocRoot(self._doc._c_doc, self._c_node) # recursive new_doc = _documentFactory(c_doc, self._doc._parser) - return new_doc.getroot() + root = new_doc.getroot() + if root is not None: + return root + # Comment/PI + c_node = c_doc.children + while c_node is not NULL and c_node.type != self._c_node.type: + c_node = c_node.next + if c_node is NULL: + return None + return _elementFactory(new_doc, c_node) def set(self, key, value): """Sets an element attribute. Modified: lxml/branch/lxml-1.1/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/lxml-1.1/src/lxml/tests/test_elementtree.py Tue Oct 17 19:08:06 2006 @@ -2242,6 +2242,17 @@ self.assertEquals('', tostring(b).replace(' ', '')) + def test_deepcopy_comment(self): + # previously caused a crash + Comment = self.etree.Comment + + a = Comment("ONE") + b = copy.deepcopy(a) + b.text = "ANOTHER" + + self.assertEquals('ONE', a.text) + self.assertEquals('ANOTHER', b.text) + def test_shallowcopy(self): Element = self.etree.Element Modified: lxml/branch/lxml-1.1/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/tests/test_etree.py (original) +++ lxml/branch/lxml-1.1/src/lxml/tests/test_etree.py Tue Oct 17 19:08:06 2006 @@ -8,7 +8,7 @@ """ -import unittest, doctest +import unittest, doctest, copy from common_imports import etree, StringIO, HelperTestCase, fileInTestDir from common_imports import SillyFileLike, canonicalize @@ -82,6 +82,17 @@ self.assertEquals(root[0].target, "mypi") self.assertEquals(root[0].text, "my test ") + def test_deepcopy_pi(self): + # previously caused a crash + ProcessingInstruction = self.etree.ProcessingInstruction + + a = ProcessingInstruction("PI", "ONE") + b = copy.deepcopy(a) + b.text = "ANOTHER" + + self.assertEquals('ONE', a.text) + self.assertEquals('ANOTHER', b.text) + def test_attribute_set(self): # ElementTree accepts arbitrary attribute values # lxml.etree allows only strings From scoder at codespeak.net Thu Oct 19 09:36:08 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 19 Oct 2006 09:36:08 +0200 (CEST) Subject: [Lxml-checkins] r33442 - lxml/branch/lxml-1.1 Message-ID: <20061019073608.CDF7310063@code0.codespeak.net> Author: scoder Date: Thu Oct 19 09:36:07 2006 New Revision: 33442 Modified: lxml/branch/lxml-1.1/setup.py Log: easy_install links to developer and bugfix versions Modified: lxml/branch/lxml-1.1/setup.py ============================================================================== --- lxml/branch/lxml-1.1/setup.py (original) +++ lxml/branch/lxml-1.1/setup.py Thu Oct 19 09:36:07 2006 @@ -65,6 +65,7 @@ src_dir = os.path.join(os.getcwd(), os.path.dirname(sys.argv[0])) version = open(os.path.join(src_dir, 'version.txt')).read().strip() +branch_version = version[:3] try: svn_entries = open(os.path.join(src_dir, '.svn', 'entries')).read() @@ -190,7 +191,18 @@ It extends the ElementTree API significantly to offer support for XPath, RelaxNG, XML Schema, XSLT, C14N and much more. -""" + changelog_text, +In case you want to use the current in-development version of lxml, you can +get it from the subversion repository at http://codespeak.net/svn/lxml/trunk . +Running ``easy_install lxml==dev`` will install it from +http://codespeak.net/svn/lxml/trunk#egg=lxml-dev + +Current bug fixes for the stable version are at +http://codespeak.net/svn/lxml/branch/lxml-%(branch_version)s . +Running ``easy_install lxml==lxml-%(branch_version)sbugfix`` will install this +version from +http://codespeak.net/svn/lxml/branch/lxml-%(branch_version)s#egg=lxml-%(branch_version)sbugfix + +""" % {"branch_version":branch_version} + changelog_text, classifiers = [ dev_status, From scoder at codespeak.net Thu Oct 19 09:37:04 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 19 Oct 2006 09:37:04 +0200 (CEST) Subject: [Lxml-checkins] r33443 - lxml/trunk Message-ID: <20061019073704.263B710063@code0.codespeak.net> Author: scoder Date: Thu Oct 19 09:37:01 2006 New Revision: 33443 Modified: lxml/trunk/setup.py Log: easy_install links to developer and bugfix versions Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Thu Oct 19 09:37:01 2006 @@ -65,6 +65,7 @@ src_dir = os.path.join(os.getcwd(), os.path.dirname(sys.argv[0])) version = open(os.path.join(src_dir, 'version.txt')).read().strip() +branch_version = version[:3] try: svn_entries = open(os.path.join(src_dir, '.svn', 'entries')).read() @@ -190,7 +191,18 @@ It extends the ElementTree API significantly to offer support for XPath, RelaxNG, XML Schema, XSLT, C14N and much more. -""" + changelog_text, +In case you want to use the current in-development version of lxml, you can +get it from the subversion repository at http://codespeak.net/svn/lxml/trunk . +Running ``easy_install lxml==dev`` will install it from +http://codespeak.net/svn/lxml/trunk#egg=lxml-dev + +Current bug fixes for the stable version are at +http://codespeak.net/svn/lxml/branch/lxml-%(branch_version)s . +Running ``easy_install lxml==lxml-%(branch_version)sbugfix`` will install this +version from +http://codespeak.net/svn/lxml/branch/lxml-%(branch_version)s#egg=lxml-%(branch_version)sbugfix + +""" % {"branch_version":branch_version} + changelog_text, classifiers = [ dev_status, From scoder at codespeak.net Fri Oct 20 08:51:50 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Oct 2006 08:51:50 +0200 (CEST) Subject: [Lxml-checkins] r33484 - lxml/trunk/doc Message-ID: <20061020065150.D9C8410060@code0.codespeak.net> Author: scoder Date: Fri Oct 20 08:51:48 2006 New Revision: 33484 Modified: lxml/trunk/doc/FAQ.txt lxml/trunk/doc/api.txt Log: notes on HTML parsing Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Fri Oct 20 08:51:48 2006 @@ -240,10 +240,14 @@ does not. However, if the unicode string declares an XML encoding internally (````), parsing is bound to fail, as this encoding is most likely not the real encoding used in Python unicode. The same is true -for HTML unicode strings that contain charset meta tags. Note that Python -uses different encodings for unicode on different platforms, so even -specifying the real internal unicode encoding is not portable between Python -interpreters. Don't do it. +for HTML unicode strings that contain charset meta tags, although the problems +may be more subtle here. The libxml2 HTML parser may not be able to parse the +meta tags in broken HTML and simply ignore them, so even if parsing succeeds, +later handling may still fail with character encoding errors. + +Note that Python uses different encodings for unicode on different platforms, +so even specifying the real internal unicode encoding is not portable between +Python interpreters. Don't do it. Python unicode strings with XML data or HTML data that carry encoding information are broken. lxml will not parse them. You must provide parsable Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Fri Oct 20 08:51:48 2006 @@ -192,9 +192,8 @@ HTML parsing is similarly simple. The parsers have a ``recover`` keyword argument that the HTMLParser sets by default. It lets libxml2 try its best to -return something usable without raising an exception. Note that this -functionality depends entirely on libxml2. You should use libxml2 version -2.6.21 or newer to take advantage of this feature:: +return something usable without raising an exception. You should use libxml2 +version 2.6.21 or newer to take advantage of this feature:: >>> broken_html = "test<body><h1>page title</h3>" @@ -211,6 +210,14 @@ >>> print etree.tostring(html) <html><head><title>test

page title

+The support for parsing broken HTML depends entirely on libxml2's recovery +algorithm. It is *not* the fault of lxml if you find documents that are so +heavily broken that the parser cannot handle them. There is also no guarantee +that the resulting tree will contain all data from the original document. The +parser may have to drop seriously broken parts when struggling to keep +parsing. Especially misplaced meta tags can suffer from this, which may lead +to encoding problems. + The use of the libxml2 parsers makes some additional information available at the API level. Currently, ElementTree objects can access the DOCTYPE information provided by a parsed document, as well as the XML version and the From scoder at codespeak.net Fri Oct 20 08:52:35 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Oct 2006 08:52:35 +0200 (CEST) Subject: [Lxml-checkins] r33485 - lxml/branch/lxml-1.1 Message-ID: <20061020065235.8E3A110060@code0.codespeak.net> Author: scoder Date: Fri Oct 20 08:52:34 2006 New Revision: 33485 Modified: lxml/branch/lxml-1.1/setup.py Log: URL fix Modified: lxml/branch/lxml-1.1/setup.py ============================================================================== --- lxml/branch/lxml-1.1/setup.py (original) +++ lxml/branch/lxml-1.1/setup.py Fri Oct 20 08:52:34 2006 @@ -198,7 +198,7 @@ Current bug fixes for the stable version are at http://codespeak.net/svn/lxml/branch/lxml-%(branch_version)s . -Running ``easy_install lxml==lxml-%(branch_version)sbugfix`` will install this +Running ``easy_install lxml==%(branch_version)sbugfix`` will install this version from http://codespeak.net/svn/lxml/branch/lxml-%(branch_version)s#egg=lxml-%(branch_version)sbugfix From scoder at codespeak.net Fri Oct 20 08:55:36 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Oct 2006 08:55:36 +0200 (CEST) Subject: [Lxml-checkins] r33486 - lxml/trunk Message-ID: <20061020065536.8B71D10060@code0.codespeak.net> Author: scoder Date: Fri Oct 20 08:55:35 2006 New Revision: 33486 Modified: lxml/trunk/setup.py Log: URL fixes Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Fri Oct 20 08:55:35 2006 @@ -198,7 +198,7 @@ Current bug fixes for the stable version are at http://codespeak.net/svn/lxml/branch/lxml-%(branch_version)s . -Running ``easy_install lxml==lxml-%(branch_version)sbugfix`` will install this +Running ``easy_install lxml==%(branch_version)sbugfix`` will install this version from http://codespeak.net/svn/lxml/branch/lxml-%(branch_version)s#egg=lxml-%(branch_version)sbugfix From scoder at codespeak.net Fri Oct 20 08:56:11 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Oct 2006 08:56:11 +0200 (CEST) Subject: [Lxml-checkins] r33487 - lxml/branch/lxml-1.1/doc Message-ID: <20061020065611.53FBD10060@code0.codespeak.net> Author: scoder Date: Fri Oct 20 08:56:10 2006 New Revision: 33487 Modified: lxml/branch/lxml-1.1/doc/FAQ.txt lxml/branch/lxml-1.1/doc/api.txt Log: notes on HTML parsing Modified: lxml/branch/lxml-1.1/doc/FAQ.txt ============================================================================== --- lxml/branch/lxml-1.1/doc/FAQ.txt (original) +++ lxml/branch/lxml-1.1/doc/FAQ.txt Fri Oct 20 08:56:10 2006 @@ -240,10 +240,14 @@ does not. However, if the unicode string declares an XML encoding internally (````), parsing is bound to fail, as this encoding is most likely not the real encoding used in Python unicode. The same is true -for HTML unicode strings that contain charset meta tags. Note that Python -uses different encodings for unicode on different platforms, so even -specifying the real internal unicode encoding is not portable between Python -interpreters. Don't do it. +for HTML unicode strings that contain charset meta tags, although the problems +may be more subtle here. The libxml2 HTML parser may not be able to parse the +meta tags in broken HTML and simply ignore them, so even if parsing succeeds, +later handling may still fail with character encoding errors. + +Note that Python uses different encodings for unicode on different platforms, +so even specifying the real internal unicode encoding is not portable between +Python interpreters. Don't do it. Python unicode strings with XML data or HTML data that carry encoding information are broken. lxml will not parse them. You must provide parsable Modified: lxml/branch/lxml-1.1/doc/api.txt ============================================================================== --- lxml/branch/lxml-1.1/doc/api.txt (original) +++ lxml/branch/lxml-1.1/doc/api.txt Fri Oct 20 08:56:10 2006 @@ -192,9 +192,8 @@ HTML parsing is similarly simple. The parsers have a ``recover`` keyword argument that the HTMLParser sets by default. It lets libxml2 try its best to -return something usable without raising an exception. Note that this -functionality depends entirely on libxml2. You should use libxml2 version -2.6.21 or newer to take advantage of this feature:: +return something usable without raising an exception. You should use libxml2 +version 2.6.21 or newer to take advantage of this feature:: >>> broken_html = "test<body><h1>page title</h3>" @@ -211,6 +210,14 @@ >>> print etree.tostring(html) <html><head><title>test

page title

+The support for parsing broken HTML depends entirely on libxml2's recovery +algorithm. It is *not* the fault of lxml if you find documents that are so +heavily broken that the parser cannot handle them. There is also no guarantee +that the resulting tree will contain all data from the original document. The +parser may have to drop seriously broken parts when struggling to keep +parsing. Especially misplaced meta tags can suffer from this, which may lead +to encoding problems. + The use of the libxml2 parsers makes some additional information available at the API level. Currently, ElementTree objects can access the DOCTYPE information provided by a parsed document, as well as the XML version and the From scoder at codespeak.net Fri Oct 20 09:09:11 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Oct 2006 09:09:11 +0200 (CEST) Subject: [Lxml-checkins] r33488 - lxml/trunk/doc Message-ID: <20061020070911.E6C4310068@code0.codespeak.net> Author: scoder Date: Fri Oct 20 09:09:10 2006 New Revision: 33488 Modified: lxml/trunk/doc/FAQ.txt Log: small FAQ fix Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Fri Oct 20 09:09:10 2006 @@ -242,8 +242,8 @@ most likely not the real encoding used in Python unicode. The same is true for HTML unicode strings that contain charset meta tags, although the problems may be more subtle here. The libxml2 HTML parser may not be able to parse the -meta tags in broken HTML and simply ignore them, so even if parsing succeeds, -later handling may still fail with character encoding errors. +meta tags in broken HTML and may end up ignoring them, so even if parsing +succeeds, later handling may still fail with character encoding errors. Note that Python uses different encodings for unicode on different platforms, so even specifying the real internal unicode encoding is not portable between From scoder at codespeak.net Fri Oct 20 09:09:33 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Oct 2006 09:09:33 +0200 (CEST) Subject: [Lxml-checkins] r33489 - lxml/branch/lxml-1.1/doc Message-ID: <20061020070933.E765910068@code0.codespeak.net> Author: scoder Date: Fri Oct 20 09:09:31 2006 New Revision: 33489 Modified: lxml/branch/lxml-1.1/doc/FAQ.txt Log: small FAQ fix Modified: lxml/branch/lxml-1.1/doc/FAQ.txt ============================================================================== --- lxml/branch/lxml-1.1/doc/FAQ.txt (original) +++ lxml/branch/lxml-1.1/doc/FAQ.txt Fri Oct 20 09:09:31 2006 @@ -242,8 +242,8 @@ most likely not the real encoding used in Python unicode. The same is true for HTML unicode strings that contain charset meta tags, although the problems may be more subtle here. The libxml2 HTML parser may not be able to parse the -meta tags in broken HTML and simply ignore them, so even if parsing succeeds, -later handling may still fail with character encoding errors. +meta tags in broken HTML and may end up ignoring them, so even if parsing +succeeds, later handling may still fail with character encoding errors. Note that Python uses different encodings for unicode on different platforms, so even specifying the real internal unicode encoding is not portable between From scoder at codespeak.net Sat Oct 21 22:13:09 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 21 Oct 2006 22:13:09 +0200 (CEST) Subject: [Lxml-checkins] r33527 - lxml/trunk/src/lxml Message-ID: <20061021201309.861B710071@code0.codespeak.net> Author: scoder Date: Sat Oct 21 22:13:07 2006 New Revision: 33527 Modified: lxml/trunk/src/lxml/xmlid.pxi Log: refcount bug and code cleanup in XMLDTDID Modified: lxml/trunk/src/lxml/xmlid.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlid.pxi (original) +++ lxml/trunk/src/lxml/xmlid.pxi Sat Oct 21 22:13:07 2006 @@ -113,12 +113,9 @@ return repr(dict(self)) def keys(self): - keys = self._keys - if keys is not None: - return python.PySequence_List(keys) - keys = self._build_keys() - self._keys = python.PySequence_Tuple(keys) - return keys + if self._keys is None: + self._keys = self._build_keys() + return self._keys[:] def __iter__(self): keys = self._keys @@ -142,12 +139,9 @@ return keys def items(self): - items = self._items - if items is not None: - return python.PySequence_List(items) - items = self._build_items() - self._items = python.PySequence_Tuple(items) - return items + if self._items is None: + self._items = self._build_items() + return self._items[:] def iteritems(self): items = self._items @@ -169,6 +163,7 @@ values = [] for item in items: value = python.PyTuple_GET_ITEM(item, 1) + python.Py_INCREF(value) python.PyList_Append(values, value) return values From scoder at codespeak.net Mon Oct 23 10:27:47 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 23 Oct 2006 10:27:47 +0200 (CEST) Subject: [Lxml-checkins] r33541 - lxml/trunk/doc Message-ID: <20061023082747.A1C1E1006E@code0.codespeak.net> Author: scoder Date: Mon Oct 23 10:27:46 2006 New Revision: 33541 Modified: lxml/trunk/doc/build.txt Log: doc fix Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Mon Oct 23 10:27:46 2006 @@ -243,11 +243,11 @@ * check md5sum of created tar.gz file and place new sum and size in dsc file * do ``dpkg-source -x lxml-...dsc`` and cd into the newly created directory * run ``dch -i`` and add a comment like "use trunk version", this will - increase the debian version number so apt/dpkg don't get confused + increase the debian version number so apt/dpkg won't get confused * run ``dpkg-buildpackage -rfakeroot -us -uc`` to build the package -Eventually dpkg-buildpackage will tell you that some dependecies are missing, -you can either install them manually or run apt-get build-dep lxml +In case ``dpkg-buildpackage`` tells you that some dependecies are missing, you +can either install them manually or run ``apt-get build-dep lxml``. That will give you .deb packages in the parent directory which can be installed using ``dpkg -i``. From scoder at codespeak.net Mon Oct 23 10:28:11 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 23 Oct 2006 10:28:11 +0200 (CEST) Subject: [Lxml-checkins] r33542 - lxml/trunk/src/lxml Message-ID: <20061023082811.315D21006E@code0.codespeak.net> Author: scoder Date: Mon Oct 23 10:28:10 2006 New Revision: 33542 Modified: lxml/trunk/src/lxml/xmlid.pxi Log: more cleanup in xmlid.pyx Modified: lxml/trunk/src/lxml/xmlid.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlid.pxi (original) +++ lxml/trunk/src/lxml/xmlid.pxi Mon Oct 23 10:28:10 2006 @@ -47,7 +47,7 @@ The dictionary must be instantiated with the root element of a parsed XML document, otherwise the behaviour is undefined. Elements and XML trees - that were created or modified through the API are not supported. + that were created or modified 'by hand' are not supported. """ cdef _Document _doc cdef object _keys @@ -89,7 +89,7 @@ return c_id is not NULL def has_key(self, id_name): - return self.__contains__(id_name) + return id_name in self def __cmp__(self, other): if other is None: @@ -118,25 +118,17 @@ return self._keys[:] def __iter__(self): - keys = self._keys - if keys is None: - keys = self.keys() - return iter(keys) + if self._keys is None: + self._keys = self._build_keys() + return iter(self._keys) def iterkeys(self): - return self.__iter__() + return self def __len__(self): - keys = self._keys - if keys is None: - keys = self.keys() - return len(keys) - - cdef object _build_keys(self): - keys = [] - tree.xmlHashScan(self._doc._c_doc.ids, - _collectIdHashKeys, keys) - return keys + if self._keys is None: + self._keys = self._build_keys() + return len(self._keys) def items(self): if self._items is None: @@ -144,24 +136,15 @@ return self._items[:] def iteritems(self): - items = self._items - if items is None: - items = self.items() - return iter(items) - - cdef object _build_items(self): - items = [] - context = (items, self._doc) - tree.xmlHashScan(self._doc._c_doc.ids, - _collectIdHashItemList, context) - return items + if self._items is None: + self._items = self._build_items() + return iter(self._items) def values(self): - items = self._items - if items is None: - items = self.items() + if self._items is None: + self._items = self._build_items() values = [] - for item in items: + for item in self._items: value = python.PyTuple_GET_ITEM(item, 1) python.Py_INCREF(value) python.PyList_Append(values, value) @@ -170,6 +153,19 @@ def itervalues(self): return iter(self.values()) + cdef object _build_keys(self): + keys = [] + tree.xmlHashScan(self._doc._c_doc.ids, + _collectIdHashKeys, keys) + return keys + + cdef object _build_items(self): + items = [] + context = (items, self._doc) + tree.xmlHashScan(self._doc._c_doc.ids, + _collectIdHashItemList, context) + return items + cdef void _collectIdHashItemDict(void* payload, void* context, char* name): # collect elements from ID attribute hash table cdef tree.xmlID* c_id From scoder at codespeak.net Tue Oct 24 08:46:09 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Oct 2006 08:46:09 +0200 (CEST) Subject: [Lxml-checkins] r33628 - lxml/trunk/src/lxml Message-ID: <20061024064609.4E64C10063@code0.codespeak.net> Author: scoder Date: Tue Oct 24 08:46:06 2006 New Revision: 33628 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/etreepublic.pxd lxml/trunk/src/lxml/objectify.pyx lxml/trunk/src/lxml/public-api.pxi Log: use iterator class for internal attribute iteration Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Tue Oct 24 08:46:06 2006 @@ -24,6 +24,9 @@ cdef object super super = __builtin__.super +cdef object StopIteration +StopIteration = __builtin__.StopIteration + del __builtin__ cdef object _elementpath @@ -41,6 +44,9 @@ except ImportError: pass +cdef object ITER_EMPTY +ITER_EMPTY = iter(()) + # the rules # any libxml C argument/variable is prefixed with c_ # any non-public function/class is prefixed with an underscore @@ -1053,13 +1059,13 @@ """Gets a list of attribute names. The names are returned in an arbitrary order (just like for an ordinary Python dictionary). """ - return self.attrib.keys() + return python.PySequence_List( _attributeIteratorFactory(self, 1) ) def items(self): """Gets element attributes, as a sequence. The attributes are returned in an arbitrary order. """ - return self.attrib.items() + return python.PySequence_List( _attributeIteratorFactory(self, 3) ) def getchildren(self): """Returns all subelements. The elements are returned in document order. @@ -1339,17 +1345,8 @@ return _getAttributeValue(self._element, key, default) def keys(self): - cdef xmlNode* c_node - cdef xmlAttr* c_attr - c_node = self._element._c_node - c_attr = c_node.properties - result = [] - while c_attr is not NULL: - if c_attr.type == tree.XML_ATTRIBUTE_NODE: - python.PyList_Append( - result, _namespacedName(c_attr)) - c_attr = c_attr.next - return result + return python.PySequence_List( + _attributeIteratorFactory(self._element, 1) ) def __iter__(self): return iter(self.keys()) @@ -1358,35 +1355,15 @@ return iter(self.keys()) def values(self): - cdef xmlNode* c_node - cdef xmlAttr* c_attr - c_node = self._element._c_node - c_attr = c_node.properties - result = [] - while c_attr is not NULL: - if c_attr.type == tree.XML_ATTRIBUTE_NODE: - python.PyList_Append( - result, _attributeValue(c_node, c_attr)) - c_attr = c_attr.next - return result + return python.PySequence_List( + _attributeIteratorFactory(self._element, 2) ) def itervalues(self): return iter(self.values()) def items(self): - result = [] - cdef xmlNode* c_node - cdef xmlAttr* c_attr - c_node = self._element._c_node - c_attr = c_node.properties - while c_attr is not NULL: - if c_attr.type == tree.XML_ATTRIBUTE_NODE: - python.PyList_Append(result, ( - _namespacedName(c_attr), - _attributeValue(c_node, c_attr) - )) - c_attr = c_attr.next - return result + return python.PySequence_List( + _attributeIteratorFactory(self._element, 3) ) def iteritems(self): return iter(self.items()) @@ -1414,6 +1391,47 @@ tree.xmlFree(c_result) return 1 +cdef class _AttribIterator: + """Attribute iterator - for internal use only! + """ + # XML attributes must not be removed while running! + cdef _Element _node + cdef xmlAttr* _c_attr + cdef int _keysvalues # 1 - keys, 2 - values, 3 - items (key, value) + def __iter__(self): + return self + + def __next__(self): + cdef xmlAttr* c_attr + if self._node is None: + raise StopIteration + c_attr = self._c_attr + while c_attr is not NULL and c_attr.type != tree.XML_ATTRIBUTE_NODE: + c_attr = c_attr.next + if c_attr is NULL: + self._node = None + raise StopIteration + + self._c_attr = c_attr.next + if self._keysvalues == 1: + return _namespacedName(c_attr) + elif self._keysvalues == 2: + return _attributeValue(self._node._c_node, c_attr) + else: + return (_namespacedName(c_attr), + _attributeValue(self._node._c_node, c_attr)) + +cdef object _attributeIteratorFactory(_Element element, int keysvalues): + cdef _AttribIterator attribs + if element._c_node.properties is NULL: + return ITER_EMPTY + attribs = _AttribIterator() + attribs._node = element + attribs._c_attr = element._c_node.properties + attribs._keysvalues = keysvalues + return attribs + + ctypedef xmlNode* (*_node_to_node_function)(xmlNode*) cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, Modified: lxml/trunk/src/lxml/etreepublic.pxd ============================================================================== --- lxml/trunk/src/lxml/etreepublic.pxd (original) +++ lxml/trunk/src/lxml/etreepublic.pxd Tue Oct 24 08:46:06 2006 @@ -103,6 +103,10 @@ # return the value of attribute "{ns}name", or the default value cdef object getAttributeValue(_NodeBase element, key, default) + # return an iterator over attribute names (1), values (2) or items (3) + # attributes must not be removed during iteration! + cdef object iterattributes(_Element element, int keysvalues) + # set an attribute value on an element # on failure, sets an exception and returns -1 cdef int setAttributeValue(_NodeBase element, key, value) except -1 Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Tue Oct 24 08:46:06 2006 @@ -941,7 +941,7 @@ value, type(element).__name__) xsi_ns = "{%s}" % XML_SCHEMA_INSTANCE_NS pytype_ns = "{%s}" % PYTYPE_NAMESPACE - for name, value in element.items(): + for name, value in cetree.iterattributes(element, 3): if name == PYTYPE_ATTRIBUTE and value == TREE_PYTYPE: continue name = name.replace(xsi_ns, 'xsi:').replace(pytype_ns, 'py:') Modified: lxml/trunk/src/lxml/public-api.pxi ============================================================================== --- lxml/trunk/src/lxml/public-api.pxi (original) +++ lxml/trunk/src/lxml/public-api.pxi Tue Oct 24 08:46:06 2006 @@ -80,6 +80,9 @@ cdef public object getAttributeValue(_NodeBase element, key, default): return _getAttributeValue(element, key, default) +cdef public object iterattributes(_Element element, int keysvalues): + return _attributeIteratorFactory(element, keysvalues) + cdef public int setAttributeValue(_NodeBase element, key, value) except -1: return _setAttributeValue(element, key, value) From scoder at codespeak.net Tue Oct 24 09:13:51 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Oct 2006 09:13:51 +0200 (CEST) Subject: [Lxml-checkins] r33629 - lxml/trunk/src/lxml Message-ID: <20061024071351.1F57B10068@code0.codespeak.net> Author: scoder Date: Tue Oct 24 09:13:49 2006 New Revision: 33629 Modified: lxml/trunk/src/lxml/etree.pyx Log: cleaned up _Attrib.__repr__() Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Tue Oct 24 09:13:49 2006 @@ -1309,10 +1309,7 @@ # ACCESSORS def __repr__(self): - result = {} - for key, value in self.items(): - result[key] = value - return repr(result) + return repr(dict( self.items() )) def __getitem__(self, key): result = _getAttributeValue(self._element, key, None) From scoder at codespeak.net Tue Oct 24 09:17:42 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Oct 2006 09:17:42 +0200 (CEST) Subject: [Lxml-checkins] r33630 - lxml/trunk/src/lxml Message-ID: <20061024071742.ABBA810068@code0.codespeak.net> Author: scoder Date: Tue Oct 24 09:17:40 2006 New Revision: 33630 Modified: lxml/trunk/src/lxml/etree.pyx Log: cleaned up _Attrib.__repr__() again Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Tue Oct 24 09:17:40 2006 @@ -1309,7 +1309,7 @@ # ACCESSORS def __repr__(self): - return repr(dict( self.items() )) + return repr(dict( _attributeIteratorFactory(self._element, 3) )) def __getitem__(self, key): result = _getAttributeValue(self._element, key, None) From scoder at codespeak.net Wed Oct 25 08:49:25 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Oct 2006 08:49:25 +0200 (CEST) Subject: [Lxml-checkins] r33705 - lxml/trunk/doc Message-ID: <20061025064925.D8EC01005A@code0.codespeak.net> Author: scoder Date: Wed Oct 25 08:49:24 2006 New Revision: 33705 Modified: lxml/trunk/doc/FAQ.txt Log: FAQ entry on objectify performance tweaking Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Wed Oct 25 08:49:24 2006 @@ -12,9 +12,8 @@ 1 General Questions 1.1 Is there a tutorial? 1.2 Where can I find more documentation about lxml? - 1.3 What is the difference between lxml.etree and lxml.objectify? - 1.4 Why is my application so slow? - 1.5 Why do I get errors about missing UCS4 symbols when installing lxml? + 1.3 Why is my application so slow? + 1.4 Why do I get errors about missing UCS4 symbols when installing lxml? 2 Bugs 2.1 My application crashes! Why does lxml.etree do that? 2.2 I think I have found a bug in lxml. What should I do? @@ -31,6 +30,9 @@ 5.2 Why doesn't ``findall()`` support full XPath expressions? 5.3 How can I find out which namespace prefixes are used in a document? 5.4 How can I specify a default namespace for XPath expressions? + 6 lxml.objectify + 6.1 What is the difference between lxml.etree and lxml.objectify? + 6.2 Is there a way to speed up frequent element access? General Questions @@ -62,30 +64,6 @@ .. _`the web page`: http://codespeak.net/lxml/#documentation -What is the difference between lxml.etree and lxml.objectify? -------------------------------------------------------------- - -The two modules provide different ways of handling XML. However, objectify -builds on top of lxml.etree and therefore inherits most of its capabilities -and a large portion of its API. - -* lxml.etree is a generic API for XML and HTML handling. It aims for - ElementTree compatibility_ and supports the entire XML infoset. It is well - suited for both mixed content and data centric XML. Its generality makes it - the best choice for most applications. - -* lxml.objectify is a specialized API for XML data handling in a Python object - syntax. It provides a very natural way to deal with data fields stored in a - structurally well defined XML format. Data is automatically converted to - Python data types and can be manipulated with normal Python operators. Look - at the examples in the `objectify documentation`_ to see what it feels like - to use it. - - Objectify is not well suited for mixed contents or HTML documents. As it is - built on top of lxml.etree, however, it inherits the normal support for - XPath, XSLT or validation. - - Why is my application so slow? ------------------------------ @@ -178,7 +156,7 @@ Due to the way libxslt handles threading, concurrent access to stylesheets is currently only possible if it was parsed in the main thread. Parsing and -using a stylesheet inside one thread also works. +applying a stylesheet inside one thread also works. Warning: You should generally avoid modifying trees in other threads than the one it was generated in. Although this should work in many cases, there are @@ -200,10 +178,10 @@ The global interpreter lock (GIL) in Python serializes access to the interpreter, so if the majority of your processing is done in Python code -(traversing trees, modifying elements, etc.), your gain will be close to 0. -The more of your XML processing moves into lxml, however, the higher your -gain. If your application is bound by XML parsing and serialisation, or by -complex XSLTs, your speedup on multi-processor machines can be substantial. +(walking trees, modifying elements, etc.), your gain will be close to 0. The +more of your XML processing moves into lxml, however, the higher your gain. +If your application is bound by XML parsing and serialisation, or by complex +XSLTs, your speedup on multi-processor machines can be substantial. See the question above to learn which operations free the GIL to support multi-threading. @@ -347,3 +325,78 @@ You can't. In XPath, there is no such thing as a default namespace. Just use an arbitrary prefix and let the namespace dictionary of the XPath evaluators map it to your namespace. See also the question above. + + +lxml.objectify +============== + +What is the difference between lxml.etree and lxml.objectify? +------------------------------------------------------------- + +The two modules provide different ways of handling XML. However, objectify +builds on top of lxml.etree and therefore inherits most of its capabilities +and a large portion of its API. + +* lxml.etree is a generic API for XML and HTML handling. It aims for + ElementTree compatibility_ and supports the entire XML infoset. It is well + suited for both mixed content and data centric XML. Its generality makes it + the best choice for most applications. + +* lxml.objectify is a specialized API for XML data handling in a Python object + syntax. It provides a very natural way to deal with data fields stored in a + structurally well defined XML format. Data is automatically converted to + Python data types and can be manipulated with normal Python operators. Look + at the examples in the `objectify documentation`_ to see what it feels like + to use it. + + Objectify is not well suited for mixed contents or HTML documents. As it is + built on top of lxml.etree, however, it inherits the normal support for + XPath, XSLT or validation. + +Is there a way to speed up frequent element access? +--------------------------------------------------- + +lxml.objectify creates Python representations of elements on the fly. To save +memory, the normal Python garbage collection mechanisms will discard them when +their last reference is gone. In cases where deeply nested elements are +frequently accessed through the objectify API, the create-discard cycles can +become a bottleneck, as elements have to be instantiated over and over again. + +If your benchmarks prove that the overhead is too high for your specific use +case, here are some things to try: + +* If you often work in subtrees, assign the parent of the subtree to a + variable or pass it into functions instead of starting at the root. This + allows accessing its descendents more directly. + +* Use precompiled ObjectPath expressions instead of accessing deeply nested + elements step-by-step via object attributes. + +* Try assigning data values directly to attributes instead of passing them + through DataElement. + +* Run ``objectify.annotate()`` over read-only trees to speed up the attribute + type inference on access. + +* To prevent frequent object create-discard cycles, you can keep a permanent + reference to the Python objects in a tree. Just create a cache dictionary + and run:: + + cache[root] = list(root.getiterator()) + + after parsing and:: + + del cache[root] + + when you are done with the tree. This will keep the Python element + representations of all elements alive and thus avoid the overhead of + repeated Python object creation. By choosing the right trees (or even + elements) to cache, you can trade memory usage against access speed. + + Things to note: you cannot currently use ``weakref.WeakKeyDictionary`` + objects for this as lxml's elements do not support weak references for + memory reasons. Also note that new element objects that you add to these + trees will not turn up in the cache automatically and will therefore still + be garbage collected when all their Python references are gone, so this is + most effective for largely immutable trees. You should consider using a set + instead of a list in this case and add new elements by hand. From scoder at codespeak.net Wed Oct 25 09:48:33 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Oct 2006 09:48:33 +0200 (CEST) Subject: [Lxml-checkins] r33706 - in lxml/trunk: . src/lxml Message-ID: <20061025074833.36E5B10050@code0.codespeak.net> Author: scoder Date: Wed Oct 25 09:48:30 2006 New Revision: 33706 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/docloader.pxi lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/xslt.pxi Log: clear resolver context after use Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Oct 25 09:48:30 2006 @@ -18,6 +18,9 @@ Bugs fixed ---------- +* Open files and XML strings returned by Python resolvers were not + closed/freed + * Copying Comments and ProcessingInstructions failed * Memory leak for external URLs in _XSLTProcessingInstruction.parseXSL() Modified: lxml/trunk/src/lxml/docloader.pxi ============================================================================== --- lxml/trunk/src/lxml/docloader.pxi (original) +++ lxml/trunk/src/lxml/docloader.pxi Wed Oct 25 09:48:30 2006 @@ -98,3 +98,7 @@ _ExceptionContext.__init__(self) self._resolvers = resolvers self._storage = _TempStore() + + cdef void clear(self): + _ExceptionContext.clear(self) + self._storage.clear() Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Wed Oct 25 09:48:30 2006 @@ -461,6 +461,7 @@ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER return _handleParseResult(pctxt, result, None, recover) finally: + self._context.clear() self._error_log.disconnect() self._unlockParser() @@ -492,6 +493,7 @@ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER return _handleParseResult(pctxt, result, None, recover) finally: + self._context.clear() self._error_log.disconnect() self._unlockParser() @@ -519,6 +521,7 @@ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER return _handleParseResult(pctxt, result, c_filename, recover) finally: + self._context.clear() self._error_log.disconnect() self._unlockParser() @@ -542,6 +545,7 @@ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER return _handleParseResult(pctxt, result, filename, recover) finally: + self._context.clear() self._error_log.disconnect() self._unlockParser() Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Wed Oct 25 09:48:30 2006 @@ -425,23 +425,26 @@ _destroyFakeDoc(input_doc._c_doc, c_doc) self._error_log.disconnect() - if self._xslt_resolver_context._has_raised(): - if c_result is not NULL: - tree.xmlFreeDoc(c_result) - self._xslt_resolver_context._raise_if_stored() - - if c_result is NULL: - error = self._error_log.last_error - if error is not None and error.message: - if error.line >= 0: - message = "%s, line %d" % (error.message, error.line) + try: + if self._xslt_resolver_context._has_raised(): + if c_result is not NULL: + tree.xmlFreeDoc(c_result) + self._xslt_resolver_context._raise_if_stored() + + if c_result is NULL: + error = self._error_log.last_error + if error is not None and error.message: + if error.line >= 0: + message = "%s, line %d" % (error.message, error.line) + else: + message = error.message + elif error.line >= 0: + message = "Error applying stylesheet, line %d" % error.line else: - message = error.message - elif error.line >= 0: - message = "Error applying stylesheet, line %d" % error.line - else: - message = "Error applying stylesheet" - raise XSLTApplyError, message + message = "Error applying stylesheet" + raise XSLTApplyError, message + finally: + self._xslt_resolver_context.clear() result_doc = _documentFactory(c_result, input_doc._parser) return _xsltResultTreeFactory(result_doc, self, profile_doc) From scoder at codespeak.net Wed Oct 25 09:55:31 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Oct 2006 09:55:31 +0200 (CEST) Subject: [Lxml-checkins] r33707 - in lxml/branch/lxml-1.1: . doc src/lxml Message-ID: <20061025075531.4E7EC10053@code0.codespeak.net> Author: scoder Date: Wed Oct 25 09:55:26 2006 New Revision: 33707 Modified: lxml/branch/lxml-1.1/CHANGES.txt lxml/branch/lxml-1.1/doc/FAQ.txt lxml/branch/lxml-1.1/doc/build.txt lxml/branch/lxml-1.1/src/lxml/docloader.pxi lxml/branch/lxml-1.1/src/lxml/etree.pyx lxml/branch/lxml-1.1/src/lxml/etreepublic.pxd lxml/branch/lxml-1.1/src/lxml/objectify.pyx lxml/branch/lxml-1.1/src/lxml/parser.pxi lxml/branch/lxml-1.1/src/lxml/public-api.pxi lxml/branch/lxml-1.1/src/lxml/xmlid.pxi lxml/branch/lxml-1.1/src/lxml/xslt.pxi Log: big merge from trunk: xmlid fixes, _Attrib cleanup, FAQ about objectify, resolver context cleanup Modified: lxml/branch/lxml-1.1/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.1/CHANGES.txt (original) +++ lxml/branch/lxml-1.1/CHANGES.txt Wed Oct 25 09:55:26 2006 @@ -18,6 +18,9 @@ Bugs fixed ---------- +* Open files and XML strings returned by Python resolvers were not + closed/freed + * Copying Comments and ProcessingInstructions failed * Memory leak for external URLs in _XSLTProcessingInstruction.parseXSL() Modified: lxml/branch/lxml-1.1/doc/FAQ.txt ============================================================================== --- lxml/branch/lxml-1.1/doc/FAQ.txt (original) +++ lxml/branch/lxml-1.1/doc/FAQ.txt Wed Oct 25 09:55:26 2006 @@ -12,9 +12,8 @@ 1 General Questions 1.1 Is there a tutorial? 1.2 Where can I find more documentation about lxml? - 1.3 What is the difference between lxml.etree and lxml.objectify? - 1.4 Why is my application so slow? - 1.5 Why do I get errors about missing UCS4 symbols when installing lxml? + 1.3 Why is my application so slow? + 1.4 Why do I get errors about missing UCS4 symbols when installing lxml? 2 Bugs 2.1 My application crashes! Why does lxml.etree do that? 2.2 I think I have found a bug in lxml. What should I do? @@ -31,6 +30,9 @@ 5.2 Why doesn't ``findall()`` support full XPath expressions? 5.3 How can I find out which namespace prefixes are used in a document? 5.4 How can I specify a default namespace for XPath expressions? + 6 lxml.objectify + 6.1 What is the difference between lxml.etree and lxml.objectify? + 6.2 Is there a way to speed up frequent element access? General Questions @@ -62,30 +64,6 @@ .. _`the web page`: http://codespeak.net/lxml/#documentation -What is the difference between lxml.etree and lxml.objectify? -------------------------------------------------------------- - -The two modules provide different ways of handling XML. However, objectify -builds on top of lxml.etree and therefore inherits most of its capabilities -and a large portion of its API. - -* lxml.etree is a generic API for XML and HTML handling. It aims for - ElementTree compatibility_ and supports the entire XML infoset. It is well - suited for both mixed content and data centric XML. Its generality makes it - the best choice for most applications. - -* lxml.objectify is a specialized API for XML data handling in a Python object - syntax. It provides a very natural way to deal with data fields stored in a - structurally well defined XML format. Data is automatically converted to - Python data types and can be manipulated with normal Python operators. Look - at the examples in the `objectify documentation`_ to see what it feels like - to use it. - - Objectify is not well suited for mixed contents or HTML documents. As it is - built on top of lxml.etree, however, it inherits the normal support for - XPath, XSLT or validation. - - Why is my application so slow? ------------------------------ @@ -178,7 +156,7 @@ Due to the way libxslt handles threading, concurrent access to stylesheets is currently only possible if it was parsed in the main thread. Parsing and -using a stylesheet inside one thread also works. +applying a stylesheet inside one thread also works. Warning: You should generally avoid modifying trees in other threads than the one it was generated in. Although this should work in many cases, there are @@ -200,10 +178,10 @@ The global interpreter lock (GIL) in Python serializes access to the interpreter, so if the majority of your processing is done in Python code -(traversing trees, modifying elements, etc.), your gain will be close to 0. -The more of your XML processing moves into lxml, however, the higher your -gain. If your application is bound by XML parsing and serialisation, or by -complex XSLTs, your speedup on multi-processor machines can be substantial. +(walking trees, modifying elements, etc.), your gain will be close to 0. The +more of your XML processing moves into lxml, however, the higher your gain. +If your application is bound by XML parsing and serialisation, or by complex +XSLTs, your speedup on multi-processor machines can be substantial. See the question above to learn which operations free the GIL to support multi-threading. @@ -347,3 +325,78 @@ You can't. In XPath, there is no such thing as a default namespace. Just use an arbitrary prefix and let the namespace dictionary of the XPath evaluators map it to your namespace. See also the question above. + + +lxml.objectify +============== + +What is the difference between lxml.etree and lxml.objectify? +------------------------------------------------------------- + +The two modules provide different ways of handling XML. However, objectify +builds on top of lxml.etree and therefore inherits most of its capabilities +and a large portion of its API. + +* lxml.etree is a generic API for XML and HTML handling. It aims for + ElementTree compatibility_ and supports the entire XML infoset. It is well + suited for both mixed content and data centric XML. Its generality makes it + the best choice for most applications. + +* lxml.objectify is a specialized API for XML data handling in a Python object + syntax. It provides a very natural way to deal with data fields stored in a + structurally well defined XML format. Data is automatically converted to + Python data types and can be manipulated with normal Python operators. Look + at the examples in the `objectify documentation`_ to see what it feels like + to use it. + + Objectify is not well suited for mixed contents or HTML documents. As it is + built on top of lxml.etree, however, it inherits the normal support for + XPath, XSLT or validation. + +Is there a way to speed up frequent element access? +--------------------------------------------------- + +lxml.objectify creates Python representations of elements on the fly. To save +memory, the normal Python garbage collection mechanisms will discard them when +their last reference is gone. In cases where deeply nested elements are +frequently accessed through the objectify API, the create-discard cycles can +become a bottleneck, as elements have to be instantiated over and over again. + +If your benchmarks prove that the overhead is too high for your specific use +case, here are some things to try: + +* If you often work in subtrees, assign the parent of the subtree to a + variable or pass it into functions instead of starting at the root. This + allows accessing its descendents more directly. + +* Use precompiled ObjectPath expressions instead of accessing deeply nested + elements step-by-step via object attributes. + +* Try assigning data values directly to attributes instead of passing them + through DataElement. + +* Run ``objectify.annotate()`` over read-only trees to speed up the attribute + type inference on access. + +* To prevent frequent object create-discard cycles, you can keep a permanent + reference to the Python objects in a tree. Just create a cache dictionary + and run:: + + cache[root] = list(root.getiterator()) + + after parsing and:: + + del cache[root] + + when you are done with the tree. This will keep the Python element + representations of all elements alive and thus avoid the overhead of + repeated Python object creation. By choosing the right trees (or even + elements) to cache, you can trade memory usage against access speed. + + Things to note: you cannot currently use ``weakref.WeakKeyDictionary`` + objects for this as lxml's elements do not support weak references for + memory reasons. Also note that new element objects that you add to these + trees will not turn up in the cache automatically and will therefore still + be garbage collected when all their Python references are gone, so this is + most effective for largely immutable trees. You should consider using a set + instead of a list in this case and add new elements by hand. Modified: lxml/branch/lxml-1.1/doc/build.txt ============================================================================== --- lxml/branch/lxml-1.1/doc/build.txt (original) +++ lxml/branch/lxml-1.1/doc/build.txt Wed Oct 25 09:55:26 2006 @@ -224,11 +224,11 @@ * check md5sum of created tar.gz file and place new sum and size in dsc file * do ``dpkg-source -x lxml-...dsc`` and cd into the newly created directory * run ``dch -i`` and add a comment like "use trunk version", this will - increase the debian version number so apt/dpkg don't get confused + increase the debian version number so apt/dpkg won't get confused * run ``dpkg-buildpackage -rfakeroot -us -uc`` to build the package -Eventually dpkg-buildpackage will tell you that some dependecies are missing, -you can either install them manually or run apt-get build-dep lxml +In case ``dpkg-buildpackage`` tells you that some dependecies are missing, you +can either install them manually or run ``apt-get build-dep lxml``. That will give you .deb packages in the parent directory which can be installed using ``dpkg -i``. Modified: lxml/branch/lxml-1.1/src/lxml/docloader.pxi ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/docloader.pxi (original) +++ lxml/branch/lxml-1.1/src/lxml/docloader.pxi Wed Oct 25 09:55:26 2006 @@ -98,3 +98,7 @@ _ExceptionContext.__init__(self) self._resolvers = resolvers self._storage = _TempStore() + + cdef void clear(self): + _ExceptionContext.clear(self) + self._storage.clear() Modified: lxml/branch/lxml-1.1/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.1/src/lxml/etree.pyx Wed Oct 25 09:55:26 2006 @@ -24,6 +24,9 @@ cdef object super super = __builtin__.super +cdef object StopIteration +StopIteration = __builtin__.StopIteration + del __builtin__ cdef object _elementpath @@ -41,6 +44,9 @@ except ImportError: pass +cdef object ITER_EMPTY +ITER_EMPTY = iter(()) + # the rules # any libxml C argument/variable is prefixed with c_ # any non-public function/class is prefixed with an underscore @@ -1052,13 +1058,13 @@ """Gets a list of attribute names. The names are returned in an arbitrary order (just like for an ordinary Python dictionary). """ - return self.attrib.keys() + return python.PySequence_List( _attributeIteratorFactory(self, 1) ) def items(self): """Gets element attributes, as a sequence. The attributes are returned in an arbitrary order. """ - return self.attrib.items() + return python.PySequence_List( _attributeIteratorFactory(self, 3) ) def getchildren(self): """Returns all subelements. The elements are returned in document order. @@ -1302,10 +1308,7 @@ # ACCESSORS def __repr__(self): - result = {} - for key, value in self.items(): - result[key] = value - return repr(result) + return repr(dict( _attributeIteratorFactory(self._element, 3) )) def __getitem__(self, key): result = _getAttributeValue(self._element, key, None) @@ -1338,17 +1341,8 @@ return _getAttributeValue(self._element, key, default) def keys(self): - cdef xmlNode* c_node - cdef xmlAttr* c_attr - c_node = self._element._c_node - c_attr = c_node.properties - result = [] - while c_attr is not NULL: - if c_attr.type == tree.XML_ATTRIBUTE_NODE: - python.PyList_Append( - result, _namespacedName(c_attr)) - c_attr = c_attr.next - return result + return python.PySequence_List( + _attributeIteratorFactory(self._element, 1) ) def __iter__(self): return iter(self.keys()) @@ -1357,35 +1351,15 @@ return iter(self.keys()) def values(self): - cdef xmlNode* c_node - cdef xmlAttr* c_attr - c_node = self._element._c_node - c_attr = c_node.properties - result = [] - while c_attr is not NULL: - if c_attr.type == tree.XML_ATTRIBUTE_NODE: - python.PyList_Append( - result, _attributeValue(c_node, c_attr)) - c_attr = c_attr.next - return result + return python.PySequence_List( + _attributeIteratorFactory(self._element, 2) ) def itervalues(self): return iter(self.values()) def items(self): - result = [] - cdef xmlNode* c_node - cdef xmlAttr* c_attr - c_node = self._element._c_node - c_attr = c_node.properties - while c_attr is not NULL: - if c_attr.type == tree.XML_ATTRIBUTE_NODE: - python.PyList_Append(result, ( - _namespacedName(c_attr), - _attributeValue(c_node, c_attr) - )) - c_attr = c_attr.next - return result + return python.PySequence_List( + _attributeIteratorFactory(self._element, 3) ) def iteritems(self): return iter(self.items()) @@ -1413,6 +1387,47 @@ tree.xmlFree(c_result) return 1 +cdef class _AttribIterator: + """Attribute iterator - for internal use only! + """ + # XML attributes must not be removed while running! + cdef _Element _node + cdef xmlAttr* _c_attr + cdef int _keysvalues # 1 - keys, 2 - values, 3 - items (key, value) + def __iter__(self): + return self + + def __next__(self): + cdef xmlAttr* c_attr + if self._node is None: + raise StopIteration + c_attr = self._c_attr + while c_attr is not NULL and c_attr.type != tree.XML_ATTRIBUTE_NODE: + c_attr = c_attr.next + if c_attr is NULL: + self._node = None + raise StopIteration + + self._c_attr = c_attr.next + if self._keysvalues == 1: + return _namespacedName(c_attr) + elif self._keysvalues == 2: + return _attributeValue(self._node._c_node, c_attr) + else: + return (_namespacedName(c_attr), + _attributeValue(self._node._c_node, c_attr)) + +cdef object _attributeIteratorFactory(_Element element, int keysvalues): + cdef _AttribIterator attribs + if element._c_node.properties is NULL: + return ITER_EMPTY + attribs = _AttribIterator() + attribs._node = element + attribs._c_attr = element._c_node.properties + attribs._keysvalues = keysvalues + return attribs + + ctypedef xmlNode* (*_node_to_node_function)(xmlNode*) cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, Modified: lxml/branch/lxml-1.1/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/etreepublic.pxd (original) +++ lxml/branch/lxml-1.1/src/lxml/etreepublic.pxd Wed Oct 25 09:55:26 2006 @@ -103,6 +103,10 @@ # return the value of attribute "{ns}name", or the default value cdef object getAttributeValue(_NodeBase element, key, default) + # return an iterator over attribute names (1), values (2) or items (3) + # attributes must not be removed during iteration! + cdef object iterattributes(_Element element, int keysvalues) + # set an attribute value on an element # on failure, sets an exception and returns -1 cdef int setAttributeValue(_NodeBase element, key, value) except -1 Modified: lxml/branch/lxml-1.1/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/objectify.pyx (original) +++ lxml/branch/lxml-1.1/src/lxml/objectify.pyx Wed Oct 25 09:55:26 2006 @@ -941,7 +941,7 @@ value, type(element).__name__) xsi_ns = "{%s}" % XML_SCHEMA_INSTANCE_NS pytype_ns = "{%s}" % PYTYPE_NAMESPACE - for name, value in element.items(): + for name, value in cetree.iterattributes(element, 3): if name == PYTYPE_ATTRIBUTE and value == TREE_PYTYPE: continue name = name.replace(xsi_ns, 'xsi:').replace(pytype_ns, 'py:') Modified: lxml/branch/lxml-1.1/src/lxml/parser.pxi ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/parser.pxi (original) +++ lxml/branch/lxml-1.1/src/lxml/parser.pxi Wed Oct 25 09:55:26 2006 @@ -461,6 +461,7 @@ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER return _handleParseResult(pctxt, result, None, recover) finally: + self._context.clear() self._error_log.disconnect() self._unlockParser() @@ -492,6 +493,7 @@ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER return _handleParseResult(pctxt, result, None, recover) finally: + self._context.clear() self._error_log.disconnect() self._unlockParser() @@ -519,6 +521,7 @@ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER return _handleParseResult(pctxt, result, c_filename, recover) finally: + self._context.clear() self._error_log.disconnect() self._unlockParser() @@ -542,6 +545,7 @@ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER return _handleParseResult(pctxt, result, filename, recover) finally: + self._context.clear() self._error_log.disconnect() self._unlockParser() Modified: lxml/branch/lxml-1.1/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/public-api.pxi (original) +++ lxml/branch/lxml-1.1/src/lxml/public-api.pxi Wed Oct 25 09:55:26 2006 @@ -80,6 +80,9 @@ cdef public object getAttributeValue(_NodeBase element, key, default): return _getAttributeValue(element, key, default) +cdef public object iterattributes(_Element element, int keysvalues): + return _attributeIteratorFactory(element, keysvalues) + cdef public int setAttributeValue(_NodeBase element, key, value) except -1: return _setAttributeValue(element, key, value) Modified: lxml/branch/lxml-1.1/src/lxml/xmlid.pxi ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/xmlid.pxi (original) +++ lxml/branch/lxml-1.1/src/lxml/xmlid.pxi Wed Oct 25 09:55:26 2006 @@ -47,7 +47,7 @@ The dictionary must be instantiated with the root element of a parsed XML document, otherwise the behaviour is undefined. Elements and XML trees - that were created or modified through the API are not supported. + that were created or modified 'by hand' are not supported. """ cdef _Document _doc cdef object _keys @@ -89,7 +89,7 @@ return c_id is not NULL def has_key(self, id_name): - return self.__contains__(id_name) + return id_name in self def __cmp__(self, other): if other is None: @@ -113,68 +113,59 @@ return repr(dict(self)) def keys(self): - keys = self._keys - if keys is not None: - return python.PySequence_List(keys) - keys = self._build_keys() - self._keys = python.PySequence_Tuple(keys) - return keys + if self._keys is None: + self._keys = self._build_keys() + return self._keys[:] def __iter__(self): - keys = self._keys - if keys is None: - keys = self.keys() - return iter(keys) + if self._keys is None: + self._keys = self._build_keys() + return iter(self._keys) def iterkeys(self): - return self.__iter__() + return self def __len__(self): - keys = self._keys - if keys is None: - keys = self.keys() - return len(keys) - - cdef object _build_keys(self): - keys = [] - tree.xmlHashScan(self._doc._c_doc.ids, - _collectIdHashKeys, keys) - return keys + if self._keys is None: + self._keys = self._build_keys() + return len(self._keys) def items(self): - items = self._items - if items is not None: - return python.PySequence_List(items) - items = self._build_items() - self._items = python.PySequence_Tuple(items) - return items + if self._items is None: + self._items = self._build_items() + return self._items[:] def iteritems(self): - items = self._items - if items is None: - items = self.items() - return iter(items) - - cdef object _build_items(self): - items = [] - context = (items, self._doc) - tree.xmlHashScan(self._doc._c_doc.ids, - _collectIdHashItemList, context) - return items + if self._items is None: + self._items = self._build_items() + return iter(self._items) def values(self): - items = self._items - if items is None: - items = self.items() + if self._items is None: + self._items = self._build_items() values = [] - for item in items: + for item in self._items: value = python.PyTuple_GET_ITEM(item, 1) + python.Py_INCREF(value) python.PyList_Append(values, value) return values def itervalues(self): return iter(self.values()) + cdef object _build_keys(self): + keys = [] + tree.xmlHashScan(self._doc._c_doc.ids, + _collectIdHashKeys, keys) + return keys + + cdef object _build_items(self): + items = [] + context = (items, self._doc) + tree.xmlHashScan(self._doc._c_doc.ids, + _collectIdHashItemList, context) + return items + cdef void _collectIdHashItemDict(void* payload, void* context, char* name): # collect elements from ID attribute hash table cdef tree.xmlID* c_id Modified: lxml/branch/lxml-1.1/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/xslt.pxi (original) +++ lxml/branch/lxml-1.1/src/lxml/xslt.pxi Wed Oct 25 09:55:26 2006 @@ -425,23 +425,26 @@ _destroyFakeDoc(input_doc._c_doc, c_doc) self._error_log.disconnect() - if self._xslt_resolver_context._has_raised(): - if c_result is not NULL: - tree.xmlFreeDoc(c_result) - self._xslt_resolver_context._raise_if_stored() - - if c_result is NULL: - error = self._error_log.last_error - if error is not None and error.message: - if error.line >= 0: - message = "%s, line %d" % (error.message, error.line) + try: + if self._xslt_resolver_context._has_raised(): + if c_result is not NULL: + tree.xmlFreeDoc(c_result) + self._xslt_resolver_context._raise_if_stored() + + if c_result is NULL: + error = self._error_log.last_error + if error is not None and error.message: + if error.line >= 0: + message = "%s, line %d" % (error.message, error.line) + else: + message = error.message + elif error.line >= 0: + message = "Error applying stylesheet, line %d" % error.line else: - message = error.message - elif error.line >= 0: - message = "Error applying stylesheet, line %d" % error.line - else: - message = "Error applying stylesheet" - raise XSLTApplyError, message + message = "Error applying stylesheet" + raise XSLTApplyError, message + finally: + self._xslt_resolver_context.clear() result_doc = _documentFactory(c_result, input_doc._parser) return _xsltResultTreeFactory(result_doc, self, profile_doc) From scoder at codespeak.net Wed Oct 25 09:57:09 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Oct 2006 09:57:09 +0200 (CEST) Subject: [Lxml-checkins] r33708 - lxml/branch/lxml-1.1 Message-ID: <20061025075709.9A30310053@code0.codespeak.net> Author: scoder Date: Wed Oct 25 09:57:08 2006 New Revision: 33708 Modified: lxml/branch/lxml-1.1/version.txt Log: set version to 1.1.2 Modified: lxml/branch/lxml-1.1/version.txt ============================================================================== --- lxml/branch/lxml-1.1/version.txt (original) +++ lxml/branch/lxml-1.1/version.txt Wed Oct 25 09:57:08 2006 @@ -1 +1 @@ -1.1.1 +1.1.2 From scoder at codespeak.net Wed Oct 25 18:47:22 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Oct 2006 18:47:22 +0200 (CEST) Subject: [Lxml-checkins] r33724 - lxml/trunk Message-ID: <20061025164722.7AAAC10074@code0.codespeak.net> Author: scoder Date: Wed Oct 25 18:47:20 2006 New Revision: 33724 Modified: lxml/trunk/CHANGES.txt Log: mark xmlid bug fixed Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Oct 25 18:47:20 2006 @@ -21,6 +21,8 @@ * Open files and XML strings returned by Python resolvers were not closed/freed +* Crash in the IDDict returned by XMLDTDID + * Copying Comments and ProcessingInstructions failed * Memory leak for external URLs in _XSLTProcessingInstruction.parseXSL() From scoder at codespeak.net Wed Oct 25 19:12:10 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Oct 2006 19:12:10 +0200 (CEST) Subject: [Lxml-checkins] r33726 - lxml/trunk/src/lxml Message-ID: <20061025171210.2FE7610053@code0.codespeak.net> Author: scoder Date: Wed Oct 25 19:12:07 2006 New Revision: 33726 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/etree_defs.h lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/xmlerror.pxi Log: make repr() a C function Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Oct 25 19:12:07 2006 @@ -1,7 +1,7 @@ cimport tree, python from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs from python cimport isinstance, issubclass, hasattr, getattr, callable -from python cimport iter, str, _cstr, _isString, Py_ssize_t +from python cimport iter, repr, str, _cstr, _isString, Py_ssize_t cimport xpath cimport xinclude cimport c14n Modified: lxml/trunk/src/lxml/etree_defs.h ============================================================================== --- lxml/trunk/src/lxml/etree_defs.h (original) +++ lxml/trunk/src/lxml/etree_defs.h Wed Oct 25 19:12:07 2006 @@ -34,6 +34,7 @@ #define getattr(o,a) PyObject_GetAttr(o,a) #define callable(o) PyCallable_Check(o) #define str(o) PyObject_Str(o) +#define repr(o) PyObject_Repr(o) #define iter(o) PyObject_GetIter(o) #define _cstr(s) PyString_AS_STRING(s) Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Wed Oct 25 19:12:07 2006 @@ -93,5 +93,6 @@ cdef object getattr(object obj, object attr) cdef int callable(object obj) cdef object str(object obj) + cdef object repr(object obj) cdef object iter(object obj) cdef char* _cstr(object s) Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Wed Oct 25 19:12:07 2006 @@ -134,7 +134,10 @@ return iter(self._entries) def __repr__(self): - return '\n'.join(map(repr, self._entries)) + l = [] + for entry in self._entries: + python.PyList_Append(l, repr(entry)) + return '\n'.join(l) def __getitem__(self, index): return self._entries[index] From scoder at codespeak.net Wed Oct 25 19:15:25 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Oct 2006 19:15:25 +0200 (CEST) Subject: [Lxml-checkins] r33727 - lxml/trunk Message-ID: <20061025171525.7C48A1006C@code0.codespeak.net> Author: scoder Date: Wed Oct 25 19:15:24 2006 New Revision: 33727 Modified: lxml/trunk/MANIFEST.in Log: cleanup Modified: lxml/trunk/MANIFEST.in ============================================================================== --- lxml/trunk/MANIFEST.in (original) +++ lxml/trunk/MANIFEST.in Wed Oct 25 19:15:24 2006 @@ -8,5 +8,5 @@ recursive-include src/lxml/tests *.rng *.xslt *.xml *.dtd recursive-include benchmark *.py recursive-include doc *.txt *.html *.css *.xml *.mgp pubkey.asc -recursive-include doc mkhtml.py rest2html.py +include doc/mkhtml.py doc/rest2html.py exclude doc/pyrex.txt src/lxml/etree.pxi From scoder at codespeak.net Wed Oct 25 19:16:07 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Oct 2006 19:16:07 +0200 (CEST) Subject: [Lxml-checkins] r33728 - lxml/trunk Message-ID: <20061025171607.86ED71006C@code0.codespeak.net> Author: scoder Date: Wed Oct 25 19:16:06 2006 New Revision: 33728 Modified: lxml/trunk/setup.py Log: support adding local Pyrex version to source distribution Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Wed Oct 25 19:16:06 2006 @@ -100,6 +100,8 @@ # setup etree extension building +sys.path.insert(0, os.path.join(src_dir, 'src')) # in case we use a local Pyrex + try: from Pyrex.Distutils import build_ext as build_pyx source_extension = ".pyx" From scoder at codespeak.net Wed Oct 25 19:19:03 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Oct 2006 19:19:03 +0200 (CEST) Subject: [Lxml-checkins] r33729 - lxml/trunk Message-ID: <20061025171903.015E71006C@code0.codespeak.net> Author: scoder Date: Wed Oct 25 19:19:02 2006 New Revision: 33729 Modified: lxml/trunk/setup.py Log: allow command line option --no-local-pyrex to override a Pyrex version in src/ Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Wed Oct 25 19:19:02 2006 @@ -100,7 +100,10 @@ # setup etree extension building -sys.path.insert(0, os.path.join(src_dir, 'src')) # in case we use a local Pyrex +if '--no-local-pyrex' in sys.argv: + sys.argv.remove('--no-local-pyrex') +else: + sys.path.insert(0, os.path.join(src_dir, 'src')) # in case we use a local Pyrex try: from Pyrex.Distutils import build_ext as build_pyx From scoder at codespeak.net Wed Oct 25 19:21:50 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Oct 2006 19:21:50 +0200 (CEST) Subject: [Lxml-checkins] r33730 - lxml/branch/lxml-1.1/src/lxml Message-ID: <20061025172150.AF0691006C@code0.codespeak.net> Author: scoder Date: Wed Oct 25 19:21:42 2006 New Revision: 33730 Modified: lxml/branch/lxml-1.1/src/lxml/etree.pyx lxml/branch/lxml-1.1/src/lxml/etree_defs.h lxml/branch/lxml-1.1/src/lxml/python.pxd lxml/branch/lxml-1.1/src/lxml/xmlerror.pxi Log: merge from trunk: repr() and str() as C functions Modified: lxml/branch/lxml-1.1/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.1/src/lxml/etree.pyx Wed Oct 25 19:21:42 2006 @@ -1,7 +1,7 @@ cimport tree, python from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs from python cimport isinstance, issubclass, hasattr, getattr, callable -from python cimport iter, str, _cstr, _isString, Py_ssize_t +from python cimport iter, repr, str, _cstr, _isString, Py_ssize_t cimport xpath cimport xinclude cimport c14n Modified: lxml/branch/lxml-1.1/src/lxml/etree_defs.h ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/etree_defs.h (original) +++ lxml/branch/lxml-1.1/src/lxml/etree_defs.h Wed Oct 25 19:21:42 2006 @@ -34,6 +34,7 @@ #define getattr(o,a) PyObject_GetAttr(o,a) #define callable(o) PyCallable_Check(o) #define str(o) PyObject_Str(o) +#define repr(o) PyObject_Repr(o) #define iter(o) PyObject_GetIter(o) #define _cstr(s) PyString_AS_STRING(s) Modified: lxml/branch/lxml-1.1/src/lxml/python.pxd ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/python.pxd (original) +++ lxml/branch/lxml-1.1/src/lxml/python.pxd Wed Oct 25 19:21:42 2006 @@ -93,5 +93,6 @@ cdef object getattr(object obj, object attr) cdef int callable(object obj) cdef object str(object obj) + cdef object repr(object obj) cdef object iter(object obj) cdef char* _cstr(object s) Modified: lxml/branch/lxml-1.1/src/lxml/xmlerror.pxi ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/xmlerror.pxi (original) +++ lxml/branch/lxml-1.1/src/lxml/xmlerror.pxi Wed Oct 25 19:21:42 2006 @@ -134,7 +134,10 @@ return iter(self._entries) def __repr__(self): - return '\n'.join(map(repr, self._entries)) + l = [] + for entry in self._entries: + python.PyList_Append(l, repr(entry)) + return '\n'.join(l) def __getitem__(self, index): return self._entries[index] From scoder at codespeak.net Wed Oct 25 19:22:10 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Oct 2006 19:22:10 +0200 (CEST) Subject: [Lxml-checkins] r33731 - lxml/branch/lxml-1.1 Message-ID: <20061025172210.C24401006C@code0.codespeak.net> Author: scoder Date: Wed Oct 25 19:22:09 2006 New Revision: 33731 Modified: lxml/branch/lxml-1.1/setup.py Log: src-local Pyrex Modified: lxml/branch/lxml-1.1/setup.py ============================================================================== --- lxml/branch/lxml-1.1/setup.py (original) +++ lxml/branch/lxml-1.1/setup.py Wed Oct 25 19:22:09 2006 @@ -100,6 +100,11 @@ # setup etree extension building +if '--no-local-pyrex' in sys.argv: + sys.argv.remove('--no-local-pyrex') +else: + sys.path.insert(0, os.path.join(src_dir, 'src')) # in case we use a local Pyrex + try: from Pyrex.Distutils import build_ext as build_pyx source_extension = ".pyx" From scoder at codespeak.net Wed Oct 25 19:22:19 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Oct 2006 19:22:19 +0200 (CEST) Subject: [Lxml-checkins] r33732 - lxml/branch/lxml-1.1 Message-ID: <20061025172219.004331006C@code0.codespeak.net> Author: scoder Date: Wed Oct 25 19:22:18 2006 New Revision: 33732 Modified: lxml/branch/lxml-1.1/CHANGES.txt lxml/branch/lxml-1.1/MANIFEST.in Log: small fixes from trunk Modified: lxml/branch/lxml-1.1/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.1/CHANGES.txt (original) +++ lxml/branch/lxml-1.1/CHANGES.txt Wed Oct 25 19:22:18 2006 @@ -21,6 +21,8 @@ * Open files and XML strings returned by Python resolvers were not closed/freed +* Crash in the IDDict returned by XMLDTDID + * Copying Comments and ProcessingInstructions failed * Memory leak for external URLs in _XSLTProcessingInstruction.parseXSL() Modified: lxml/branch/lxml-1.1/MANIFEST.in ============================================================================== --- lxml/branch/lxml-1.1/MANIFEST.in (original) +++ lxml/branch/lxml-1.1/MANIFEST.in Wed Oct 25 19:22:18 2006 @@ -8,5 +8,5 @@ recursive-include src/lxml/tests *.rng *.xslt *.xml *.dtd recursive-include benchmark *.py recursive-include doc *.txt *.html *.css *.xml *.mgp pubkey.asc -recursive-include doc mkhtml.py rest2html.py +include doc/mkhtml.py doc/rest2html.py exclude doc/pyrex.txt src/lxml/etree.pxi From scoder at codespeak.net Wed Oct 25 19:40:22 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Oct 2006 19:40:22 +0200 (CEST) Subject: [Lxml-checkins] r33733 - lxml/trunk Message-ID: <20061025174022.88C621006C@code0.codespeak.net> Author: scoder Date: Wed Oct 25 19:40:21 2006 New Revision: 33733 Modified: lxml/trunk/setup.py Log: removed --no-local-pyrex option from setup.py again: just delete the pyrex directory instead Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Wed Oct 25 19:40:21 2006 @@ -100,10 +100,7 @@ # setup etree extension building -if '--no-local-pyrex' in sys.argv: - sys.argv.remove('--no-local-pyrex') -else: - sys.path.insert(0, os.path.join(src_dir, 'src')) # in case we use a local Pyrex +sys.path.insert(0, os.path.join(src_dir, 'pyrex')) # in case we use a local Pyrex try: from Pyrex.Distutils import build_ext as build_pyx From scoder at codespeak.net Wed Oct 25 19:40:44 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Oct 2006 19:40:44 +0200 (CEST) Subject: [Lxml-checkins] r33734 - lxml/trunk Message-ID: <20061025174044.0552A1006C@code0.codespeak.net> Author: scoder Date: Wed Oct 25 19:40:42 2006 New Revision: 33734 Modified: lxml/trunk/CHANGES.txt Log: now ships with patched Pyrex Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Oct 25 19:40:42 2006 @@ -8,6 +8,8 @@ Features added -------------- +* Source distribution now ships with a patched Pyrex + * New C-API function makeElement() to create new elements with text, tail, attributes and namespaces From scoder at codespeak.net Wed Oct 25 19:42:21 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Oct 2006 19:42:21 +0200 (CEST) Subject: [Lxml-checkins] r33735 - lxml/branch/lxml-1.1 Message-ID: <20061025174221.AB7111006C@code0.codespeak.net> Author: scoder Date: Wed Oct 25 19:42:20 2006 New Revision: 33735 Modified: lxml/branch/lxml-1.1/CHANGES.txt lxml/branch/lxml-1.1/setup.py Log: build fixes from trunk Modified: lxml/branch/lxml-1.1/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.1/CHANGES.txt (original) +++ lxml/branch/lxml-1.1/CHANGES.txt Wed Oct 25 19:42:20 2006 @@ -8,6 +8,8 @@ Features added -------------- +* Source distribution now ships with a patched Pyrex + * New C-API function makeElement() to create new elements with text, tail, attributes and namespaces Modified: lxml/branch/lxml-1.1/setup.py ============================================================================== --- lxml/branch/lxml-1.1/setup.py (original) +++ lxml/branch/lxml-1.1/setup.py Wed Oct 25 19:42:20 2006 @@ -100,10 +100,7 @@ # setup etree extension building -if '--no-local-pyrex' in sys.argv: - sys.argv.remove('--no-local-pyrex') -else: - sys.path.insert(0, os.path.join(src_dir, 'src')) # in case we use a local Pyrex +sys.path.insert(0, os.path.join(src_dir, 'pyrex')) # in case we use a local Pyrex try: from Pyrex.Distutils import build_ext as build_pyx From scoder at codespeak.net Wed Oct 25 20:34:03 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Oct 2006 20:34:03 +0200 (CEST) Subject: [Lxml-checkins] r33736 - lxml/trunk/src/lxml/tests Message-ID: <20061025183403.B5F5A10050@code0.codespeak.net> Author: scoder Date: Wed Oct 25 20:34:01 2006 New Revision: 33736 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_etree.py Log: moved Comment deepcopy test into test_etree.py to work around bugs in ET Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Wed Oct 25 20:34:01 2006 @@ -2242,17 +2242,6 @@ self.assertEquals('', tostring(b).replace(' ', '')) - def test_deepcopy_comment(self): - # previously caused a crash - Comment = self.etree.Comment - - a = Comment("ONE") - b = copy.deepcopy(a) - b.text = "ANOTHER" - - self.assertEquals('ONE', a.text) - self.assertEquals('ANOTHER', b.text) - def test_shallowcopy(self): Element = self.etree.Element Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Wed Oct 25 20:34:01 2006 @@ -93,6 +93,18 @@ self.assertEquals('ONE', a.text) self.assertEquals('ANOTHER', b.text) + def test_deepcopy_comment(self): + # previously caused a crash + # not supported by ET! + Comment = self.etree.Comment + + a = Comment("ONE") + b = copy.deepcopy(a) + b.text = "ANOTHER" + + self.assertEquals('ONE', a.text) + self.assertEquals('ANOTHER', b.text) + def test_attribute_set(self): # ElementTree accepts arbitrary attribute values # lxml.etree allows only strings From scoder at codespeak.net Wed Oct 25 20:35:02 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Oct 2006 20:35:02 +0200 (CEST) Subject: [Lxml-checkins] r33737 - lxml/branch/lxml-1.1/src/lxml/tests Message-ID: <20061025183502.DEB1810050@code0.codespeak.net> Author: scoder Date: Wed Oct 25 20:35:01 2006 New Revision: 33737 Modified: lxml/branch/lxml-1.1/src/lxml/tests/test_elementtree.py lxml/branch/lxml-1.1/src/lxml/tests/test_etree.py Log: moved Comment deepcopy test into test_etree.py to work around bugs in ET Modified: lxml/branch/lxml-1.1/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/lxml-1.1/src/lxml/tests/test_elementtree.py Wed Oct 25 20:35:01 2006 @@ -2242,17 +2242,6 @@ self.assertEquals('', tostring(b).replace(' ', '')) - def test_deepcopy_comment(self): - # previously caused a crash - Comment = self.etree.Comment - - a = Comment("ONE") - b = copy.deepcopy(a) - b.text = "ANOTHER" - - self.assertEquals('ONE', a.text) - self.assertEquals('ANOTHER', b.text) - def test_shallowcopy(self): Element = self.etree.Element Modified: lxml/branch/lxml-1.1/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/lxml-1.1/src/lxml/tests/test_etree.py (original) +++ lxml/branch/lxml-1.1/src/lxml/tests/test_etree.py Wed Oct 25 20:35:01 2006 @@ -93,6 +93,18 @@ self.assertEquals('ONE', a.text) self.assertEquals('ANOTHER', b.text) + def test_deepcopy_comment(self): + # previously caused a crash + # not supported by ET! + Comment = self.etree.Comment + + a = Comment("ONE") + b = copy.deepcopy(a) + b.text = "ANOTHER" + + self.assertEquals('ONE', a.text) + self.assertEquals('ANOTHER', b.text) + def test_attribute_set(self): # ElementTree accepts arbitrary attribute values # lxml.etree allows only strings From scoder at codespeak.net Thu Oct 26 08:53:52 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 26 Oct 2006 08:53:52 +0200 (CEST) Subject: [Lxml-checkins] r33746 - lxml/trunk Message-ID: <20061026065352.43C5F10053@code0.codespeak.net> Author: scoder Date: Thu Oct 26 08:53:49 2006 New Revision: 33746 Modified: lxml/trunk/MANIFEST.in Log: include Pyrex in source distro Modified: lxml/trunk/MANIFEST.in ============================================================================== --- lxml/trunk/MANIFEST.in (original) +++ lxml/trunk/MANIFEST.in Thu Oct 26 08:53:49 2006 @@ -8,5 +8,6 @@ recursive-include src/lxml/tests *.rng *.xslt *.xml *.dtd recursive-include benchmark *.py recursive-include doc *.txt *.html *.css *.xml *.mgp pubkey.asc +recursive-include pyrex/Pyrex *.py include doc/mkhtml.py doc/rest2html.py exclude doc/pyrex.txt src/lxml/etree.pxi From scoder at codespeak.net Thu Oct 26 08:54:52 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 26 Oct 2006 08:54:52 +0200 (CEST) Subject: [Lxml-checkins] r33747 - lxml/trunk/benchmark Message-ID: <20061026065452.D49EE10053@code0.codespeak.net> Author: scoder Date: Thu Oct 26 08:54:50 2006 New Revision: 33747 Added: lxml/trunk/benchmark/bench_objectify.py Modified: lxml/trunk/benchmark/benchbase.py Log: cleanup in benchmarks, new objectify benchmark class Added: lxml/trunk/benchmark/bench_objectify.py ============================================================================== --- (empty file) +++ lxml/trunk/benchmark/bench_objectify.py Thu Oct 26 08:54:50 2006 @@ -0,0 +1,60 @@ +import sys, copy +from itertools import * +from StringIO import StringIO + +from lxml import etree, objectify + +parser = etree.XMLParser(remove_blank_text=True) +lookup = etree.ElementNamespaceClassLookup(objectify.ObjectifyElementClassLookup()) +parser.setElementClassLookup(lookup) + +import benchbase +from benchbase import with_attributes, with_text, onlylib, serialized + +############################################################ +# Benchmarks +############################################################ + +class BenchMark(benchbase.BenchMarkBase): + def __init__(self, lib): + benchbase.BenchMarkBase.__init__(self, lib, parser) + + def bench_attributes(self, root): + "1 2 4" + for i in repeat(None, 3000): + root.zzzzz + + def bench_attributes_deep(self, root): + "1 2 4" + for i in repeat(None, 3000): + root.zzzzz['{cdefg}z00000'] + + def bench_attributes_deep_cached(self, root): + "1 2 4" + cache1 = root.zzzzz + cache2 = cache1['{cdefg}z00000'] + for i in repeat(None, 3000): + root.zzzzz['{cdefg}z00000'] + + def bench_objectpath(self, root): + "1 2 4" + path = objectify.ObjectPath(".zzzzz") + for i in repeat(None, 3000): + path(root) + + def bench_objectpath_deep(self, root): + "1 2 4" + path = objectify.ObjectPath(".zzzzz.{cdefg}z00000") + for i in repeat(None, 3000): + path(root) + + def bench_objectpath_deep_cached(self, root): + "1 2 4" + cache1 = root.zzzzz + cache2 = cache1['{cdefg}z00000'] + path = objectify.ObjectPath(".zzzzz.{cdefg}z00000") + for i in repeat(None, 3000): + path(root) + +if __name__ == '__main__': + benchbase.main(BenchMark) Modified: lxml/trunk/benchmark/benchbase.py ============================================================================== --- lxml/trunk/benchmark/benchbase.py (original) +++ l