From scoder at codespeak.net Tue Sep 2 22:10:00 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 2 Sep 2008 22:10:00 +0200 (CEST) Subject: [Lxml-checkins] r57776 - in lxml/branch/lxml-2.1: . src/lxml src/lxml/tests Message-ID: <20080902201000.DB553169F97@codespeak.net> Author: scoder Date: Tue Sep 2 22:09:53 2008 New Revision: 57776 Modified: lxml/branch/lxml-2.1/CHANGES.txt lxml/branch/lxml-2.1/src/lxml/parsertarget.pxi lxml/branch/lxml-2.1/src/lxml/tests/test_etree.py Log: let target parser honour recover option Modified: lxml/branch/lxml-2.1/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.1/CHANGES.txt (original) +++ lxml/branch/lxml-2.1/CHANGES.txt Tue Sep 2 22:09:53 2008 @@ -16,6 +16,9 @@ Bugs fixed ---------- +* Target parser did not honour the ``recover`` option and raised an + exception instead of calling ``.close()`` on the target. + Other changes ------------- Modified: lxml/branch/lxml-2.1/src/lxml/parsertarget.pxi ============================================================================== --- lxml/branch/lxml-2.1/src/lxml/parsertarget.pxi (original) +++ lxml/branch/lxml-2.1/src/lxml/parsertarget.pxi Tue Sep 2 22:09:53 2008 @@ -110,13 +110,17 @@ cdef object _handleParseResult(self, _BaseParser parser, xmlDoc* result, filename): - if not self._c_ctxt.wellFormed: + cdef bint recover + recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER + if not self._c_ctxt.wellFormed and not recover: _raiseParseError(self._c_ctxt, filename, self._error_log) self._raise_if_stored() return self._python_target.close() cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser, xmlDoc* result, filename) except NULL: + cdef bint recover + recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER if result is not NULL and result._private is NULL: # no _Document proxy => orphen tree.xmlFreeDoc(result) @@ -126,7 +130,7 @@ # no _Document proxy => orphen tree.xmlFreeDoc(self._c_ctxt.myDoc) self._c_ctxt.myDoc = NULL - if not self._c_ctxt.wellFormed: + if not self._c_ctxt.wellFormed and not recover: _raiseParseError(self._c_ctxt, filename, self._error_log) self._raise_if_stored() raise _TargetParserResult(self._python_target.close()) Modified: lxml/branch/lxml-2.1/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/lxml-2.1/src/lxml/tests/test_etree.py (original) +++ lxml/branch/lxml-2.1/src/lxml/tests/test_etree.py Tue Sep 2 22:09:53 2008 @@ -525,6 +525,31 @@ "data-ca", "end-a", "data-B", "end-root"], events) + def test_parser_target_recover(self): + events = [] + class Target(object): + def start(self, tag, attrib): + events.append("start-" + tag) + def end(self, tag): + events.append("end-" + tag) + def data(self, data): + events.append("data-" + data) + def close(self): + events.append("close") + return "DONE" + + parser = self.etree.XMLParser(target=Target(), + recover=True) + + parser.feed(_bytes('AcaB')) + done = parser.close() + + self.assertEquals("DONE", done) + self.assertEquals(["start-root", "data-A", "start-a", + "data-ca", "end-a", "data-B", + "end-root", "close"], + events) + def test_iterwalk_tag(self): iterwalk = self.etree.iterwalk root = self.etree.XML(_bytes('')) From scoder at codespeak.net Tue Sep 2 22:17:03 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 2 Sep 2008 22:17:03 +0200 (CEST) Subject: [Lxml-checkins] r57778 - in lxml/branch/lxml-2.0: . src/lxml src/lxml/tests Message-ID: <20080902201703.89A67169F97@codespeak.net> Author: scoder Date: Tue Sep 2 22:16:59 2008 New Revision: 57778 Modified: lxml/branch/lxml-2.0/CHANGES.txt lxml/branch/lxml-2.0/src/lxml/parsertarget.pxi lxml/branch/lxml-2.0/src/lxml/tests/test_etree.py Log: let target parser honour recover option Modified: lxml/branch/lxml-2.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.0/CHANGES.txt (original) +++ lxml/branch/lxml-2.0/CHANGES.txt Tue Sep 2 22:16:59 2008 @@ -2,6 +2,22 @@ lxml changelog ============== +Under development +================= + +Features added +-------------- + +Bugs fixed +---------- + +* Target parser did not honour the ``recover`` option and raised an + exception instead of calling ``.close()`` on the target. + +Other changes +------------- + + 2.0.8 (2008-07-24) ================== Modified: lxml/branch/lxml-2.0/src/lxml/parsertarget.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/parsertarget.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/parsertarget.pxi Tue Sep 2 22:16:59 2008 @@ -110,13 +110,17 @@ cdef object _handleParseResult(self, _BaseParser parser, xmlDoc* result, filename): - if not self._c_ctxt.wellFormed: + cdef bint recover + recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER + if not self._c_ctxt.wellFormed and not recover: _raiseParseError(self._c_ctxt, filename, self._error_log) self._raise_if_stored() return self._python_target.close() cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser, xmlDoc* result, filename) except NULL: + cdef bint recover + recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER if result is not NULL and result._private is NULL: # no _Document proxy => orphen tree.xmlFreeDoc(result) @@ -126,7 +130,7 @@ # no _Document proxy => orphen tree.xmlFreeDoc(self._c_ctxt.myDoc) self._c_ctxt.myDoc = NULL - if not self._c_ctxt.wellFormed: + if not self._c_ctxt.wellFormed and not recover: _raiseParseError(self._c_ctxt, filename, self._error_log) self._raise_if_stored() raise _TargetParserResult(self._python_target.close()) Modified: lxml/branch/lxml-2.0/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/tests/test_etree.py (original) +++ lxml/branch/lxml-2.0/src/lxml/tests/test_etree.py Tue Sep 2 22:16:59 2008 @@ -439,6 +439,31 @@ tree.parse, StringIO(""), parser=parser) self.assertEquals(["start", "end"], events) + def test_parser_target_recover(self): + events = [] + class Target(object): + def start(self, tag, attrib): + events.append("start-" + tag) + def end(self, tag): + events.append("end-" + tag) + def data(self, data): + events.append("data-" + data) + def close(self): + events.append("close") + return "DONE" + + parser = self.etree.XMLParser(target=Target(), + recover=True) + + parser.feed('AcaB') + done = parser.close() + + self.assertEquals("DONE", done) + self.assertEquals(["start-root", "data-A", "start-a", + "data-ca", "end-a", "data-B", + "end-root", "close"], + events) + def test_iterwalk_tag(self): iterwalk = self.etree.iterwalk root = self.etree.XML('') From scoder at codespeak.net Tue Sep 2 22:18:55 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 2 Sep 2008 22:18:55 +0200 (CEST) Subject: [Lxml-checkins] r57779 - in lxml/trunk: . src/lxml Message-ID: <20080902201855.9228C169F95@codespeak.net> Author: scoder Date: Tue Sep 2 22:18:46 2008 New Revision: 57779 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r4754 at delle: sbehnel | 2008-09-01 15:51:55 +0200 small fixes Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Sep 2 22:18:46 2008 @@ -38,7 +38,7 @@ cdef object BytesIO, StringIO try: from io import BytesIO, StringIO -except ImportError: +except (ImportError, AttributeError): from StringIO import StringIO, StringIO as BytesIO cdef object _elementpath @@ -103,7 +103,7 @@ # xml schema "http://www.w3.org/2001/XMLSchema": "xs", "http://www.w3.org/2001/XMLSchema-instance": "xsi", - # dublic core + # dublin core "http://purl.org/dc/elements/1.1/": "dc", # objectify "http://codespeak.net/lxml/objectify/pytype" : "py", From scoder at codespeak.net Tue Sep 2 22:19:01 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 2 Sep 2008 22:19:01 +0200 (CEST) Subject: [Lxml-checkins] r57780 - in lxml/trunk: . src/lxml/tests Message-ID: <20080902201901.283B2169F95@codespeak.net> Author: scoder Date: Tue Sep 2 22:18:59 2008 New Revision: 57780 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/tests/test_xpathevaluator.py Log: r4755 at delle: sbehnel | 2008-09-01 15:52:26 +0200 disabled broken test that uses non-ASCII characters in URIs Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Tue Sep 2 22:18:59 2008 @@ -555,19 +555,21 @@ self.assertEquals(1, len(r)) self.assertEquals('{nsb}b', r[0].tag) - def test_xpath_compile_unicode(self): - x = self.parse(_bytes('' + # disabled this test as non-ASCII characters in namespace URIs are + # not acceptable + def _test_xpath_compile_unicode(self): + x = self.parse(_bytes('' ).decode("unicode_escape")) - expr = etree.ETXPath(_bytes("/a/{nsa\\uf8d2}b").decode("unicode_escape")) + expr = etree.ETXPath(_bytes("/a/{http://nsa/\\uf8d2}b").decode("unicode_escape")) r = expr(x) self.assertEquals(1, len(r)) - self.assertEquals(_bytes('{nsa\\uf8d2}b').decode("unicode_escape"), r[0].tag) + self.assertEquals(_bytes('{http://nsa/\\uf8d2}b').decode("unicode_escape"), r[0].tag) - expr = etree.ETXPath(_bytes("/a/{nsb\\uf8d1}b").decode("unicode_escape")) + expr = etree.ETXPath(_bytes("/a/{http://nsb/\\uf8d1}b").decode("unicode_escape")) r = expr(x) self.assertEquals(1, len(r)) - self.assertEquals(_bytes('{nsb\\uf8d1}b').decode("unicode_escape"), r[0].tag) + self.assertEquals(_bytes('{http://nsb/\\uf8d1}b').decode("unicode_escape"), r[0].tag) SAMPLE_XML = etree.parse(BytesIO(""" From scoder at codespeak.net Tue Sep 2 22:19:15 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 2 Sep 2008 22:19:15 +0200 (CEST) Subject: [Lxml-checkins] r57781 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20080902201915.B39A9169F95@codespeak.net> Author: scoder Date: Tue Sep 2 22:19:08 2008 New Revision: 57781 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/parsertarget.pxi lxml/trunk/src/lxml/tests/test_etree.py Log: r4759 at delle: sbehnel | 2008-09-02 22:07:11 +0200 let target parser honour recover option Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Sep 2 22:19:08 2008 @@ -20,6 +20,9 @@ Bugs fixed ---------- +* Target parser did not honour the ``recover`` option and raised an + exception instead of calling ``.close()`` on the target. + Other changes ------------- Modified: lxml/trunk/src/lxml/parsertarget.pxi ============================================================================== --- lxml/trunk/src/lxml/parsertarget.pxi (original) +++ lxml/trunk/src/lxml/parsertarget.pxi Tue Sep 2 22:19:08 2008 @@ -110,13 +110,17 @@ cdef object _handleParseResult(self, _BaseParser parser, xmlDoc* result, filename): - if not self._c_ctxt.wellFormed: + cdef bint recover + recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER + if not self._c_ctxt.wellFormed and not recover: _raiseParseError(self._c_ctxt, filename, self._error_log) self._raise_if_stored() return self._python_target.close() cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser, xmlDoc* result, filename) except NULL: + cdef bint recover + recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER if result is not NULL and result._private is NULL: # no _Document proxy => orphen tree.xmlFreeDoc(result) @@ -126,7 +130,7 @@ # no _Document proxy => orphen tree.xmlFreeDoc(self._c_ctxt.myDoc) self._c_ctxt.myDoc = NULL - if not self._c_ctxt.wellFormed: + if not self._c_ctxt.wellFormed and not recover: _raiseParseError(self._c_ctxt, filename, self._error_log) self._raise_if_stored() raise _TargetParserResult(self._python_target.close()) Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Tue Sep 2 22:19:08 2008 @@ -525,6 +525,31 @@ "data-ca", "end-a", "data-B", "end-root"], events) + def test_parser_target_recover(self): + events = [] + class Target(object): + def start(self, tag, attrib): + events.append("start-" + tag) + def end(self, tag): + events.append("end-" + tag) + def data(self, data): + events.append("data-" + data) + def close(self): + events.append("close") + return "DONE" + + parser = self.etree.XMLParser(target=Target(), + recover=True) + + parser.feed(_bytes('AcaB')) + done = parser.close() + + self.assertEquals("DONE", done) + self.assertEquals(["start-root", "data-A", "start-a", + "data-ca", "end-a", "data-B", + "end-root", "close"], + events) + def test_iterwalk_tag(self): iterwalk = self.etree.iterwalk root = self.etree.XML(_bytes('')) From scoder at codespeak.net Fri Sep 5 13:50:01 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 5 Sep 2008 13:50:01 +0200 (CEST) Subject: [Lxml-checkins] r57837 - in lxml/branch/lxml-2.1: . src/lxml Message-ID: <20080905115001.21531169F13@codespeak.net> Author: scoder Date: Fri Sep 5 13:50:00 2008 New Revision: 57837 Modified: lxml/branch/lxml-2.1/CHANGES.txt lxml/branch/lxml-2.1/src/lxml/proxy.pxi Log: fix another threading-related memory problem Modified: lxml/branch/lxml-2.1/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.1/CHANGES.txt (original) +++ lxml/branch/lxml-2.1/CHANGES.txt Fri Sep 5 13:50:00 2008 @@ -16,6 +16,8 @@ Bugs fixed ---------- +* Memory problem when passing documents between threads. + * Target parser did not honour the ``recover`` option and raised an exception instead of calling ``.close()`` on the target. Modified: lxml/branch/lxml-2.1/src/lxml/proxy.pxi ============================================================================== --- lxml/branch/lxml-2.1/src/lxml/proxy.pxi (original) +++ lxml/branch/lxml-2.1/src/lxml/proxy.pxi Fri Sep 5 13:50:00 2008 @@ -46,12 +46,12 @@ python.Py_XDECREF(proxy._gc_doc) proxy._gc_doc = NULL -cdef inline void _updateProxyDocument(xmlNode* c_node, _Document doc): +cdef inline _Document _updateProxyDocument(xmlNode* c_node, _Document doc): u"""Replace the document reference of a proxy. This may deallocate the original document of the proxy! """ - cdef _Document old_doc + cdef _Document old_doc = None cdef _Element element = <_Element>c_node._private if element._doc is not doc: old_doc = element._doc @@ -59,6 +59,7 @@ python.Py_INCREF(doc) element._gc_doc = doc python.Py_DECREF(old_doc) + return old_doc ################################################################################ # temporarily make a node the root node of its document @@ -315,6 +316,7 @@ cdef xmlNs* c_nsdef cdef xmlNs* c_del_ns_list cdef cstd.size_t i + cdef list old_docs = [] if not tree._isElementOrXInclude(c_element): return 0 @@ -365,9 +367,11 @@ if c_node is NULL: # no children => back off and continue with siblings and parents - # 4) fix _Document reference (may dealloc the original document!) + # 4) fix _Document reference if c_element._private is not NULL: - _updateProxyDocument(c_element, doc) + old_doc = _updateProxyDocument(c_element, doc) + if old_doc not in old_docs: + old_docs.append(old_doc) if c_element is c_start_node: break # all done @@ -383,9 +387,11 @@ if c_element is NULL or not tree._isElementOrXInclude(c_element): break - # 4) fix _Document reference (may dealloc the original document!) + # 4) fix _Document reference if c_element._private is not NULL: - _updateProxyDocument(c_element, doc) + old_doc = _updateProxyDocument(c_element, doc) + if old_doc not in old_docs: + old_docs.append(old_doc) if c_element is c_start_node: break @@ -402,6 +408,9 @@ if doc._c_doc.dict is not c_source_doc.dict: fixThreadDictNames(c_start_node, c_source_doc.dict, doc._c_doc.dict) + # *now* free all _Documents the original tree referred to + old_docs = None + # free now unused namespace declarations if c_del_ns_list is not NULL: tree.xmlFreeNsList(c_del_ns_list) From scoder at codespeak.net Fri Sep 5 13:50:10 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 5 Sep 2008 13:50:10 +0200 (CEST) Subject: [Lxml-checkins] r57838 - in lxml/branch/lxml-2.0: . src/lxml Message-ID: <20080905115010.A02A9169F15@codespeak.net> Author: scoder Date: Fri Sep 5 13:50:09 2008 New Revision: 57838 Modified: lxml/branch/lxml-2.0/CHANGES.txt lxml/branch/lxml-2.0/src/lxml/proxy.pxi Log: fix another threading-related memory problem Modified: lxml/branch/lxml-2.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.0/CHANGES.txt (original) +++ lxml/branch/lxml-2.0/CHANGES.txt Fri Sep 5 13:50:09 2008 @@ -11,6 +11,8 @@ Bugs fixed ---------- +* Memory problem when passing documents between threads. + * Target parser did not honour the ``recover`` option and raised an exception instead of calling ``.close()`` on the target. Modified: lxml/branch/lxml-2.0/src/lxml/proxy.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/proxy.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/proxy.pxi Fri Sep 5 13:50:09 2008 @@ -46,12 +46,12 @@ python.Py_XDECREF(proxy._gc_doc) proxy._gc_doc = NULL -cdef inline void _updateProxyDocument(xmlNode* c_node, _Document doc): +cdef inline _Document _updateProxyDocument(xmlNode* c_node, _Document doc): u"""Replace the document reference of a proxy. This may deallocate the original document of the proxy! """ - cdef _Document old_doc + cdef _Document old_doc = None cdef _Element element = <_Element>c_node._private if element._doc is not doc: old_doc = element._doc @@ -59,6 +59,7 @@ python.Py_INCREF(doc) element._gc_doc = doc python.Py_DECREF(old_doc) + return old_doc ################################################################################ # temporarily make a node the root node of its document @@ -315,6 +316,7 @@ cdef xmlNs* c_nsdef cdef xmlNs* c_del_ns_list cdef cstd.size_t i + cdef list old_docs = [] if not tree._isElementOrXInclude(c_element): return 0 @@ -365,9 +367,11 @@ if c_node is NULL: # no children => back off and continue with siblings and parents - # 4) fix _Document reference (may dealloc the original document!) + # 4) fix _Document reference if c_element._private is not NULL: - _updateProxyDocument(c_element, doc) + old_doc = _updateProxyDocument(c_element, doc) + if old_doc not in old_docs: + old_docs.append(old_doc) if c_element is c_start_node: break # all done @@ -383,9 +387,11 @@ if c_element is NULL or not tree._isElementOrXInclude(c_element): break - # 4) fix _Document reference (may dealloc the original document!) + # 4) fix _Document reference if c_element._private is not NULL: - _updateProxyDocument(c_element, doc) + old_doc = _updateProxyDocument(c_element, doc) + if old_doc not in old_docs: + old_docs.append(old_doc) if c_element is c_start_node: break @@ -402,6 +408,9 @@ if doc._c_doc.dict is not c_source_doc.dict: fixThreadDictNames(c_start_node, c_source_doc.dict, doc._c_doc.dict) + # *now* free all _Documents the original tree referred to + old_docs = None + # free now unused namespace declarations if c_del_ns_list is not NULL: tree.xmlFreeNsList(c_del_ns_list) From scoder at codespeak.net Fri Sep 5 13:50:18 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 5 Sep 2008 13:50:18 +0200 (CEST) Subject: [Lxml-checkins] r57839 - in lxml/trunk: . src/lxml Message-ID: <20080905115018.0B017169F1D@codespeak.net> Author: scoder Date: Fri Sep 5 13:50:17 2008 New Revision: 57839 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/proxy.pxi Log: r4765 at delle: sbehnel | 2008-09-05 13:47:15 +0200 fix another threading-related memory problem Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Sep 5 13:50:17 2008 @@ -20,6 +20,8 @@ Bugs fixed ---------- +* Memory problem when passing documents between threads. + * Target parser did not honour the ``recover`` option and raised an exception instead of calling ``.close()`` on the target. Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Fri Sep 5 13:50:17 2008 @@ -44,12 +44,12 @@ python.Py_XDECREF(proxy._gc_doc) proxy._gc_doc = NULL -cdef inline void _updateProxyDocument(xmlNode* c_node, _Document doc): +cdef inline _Document _updateProxyDocument(xmlNode* c_node, _Document doc): u"""Replace the document reference of a proxy. This may deallocate the original document of the proxy! """ - cdef _Document old_doc + cdef _Document old_doc = None cdef _Element element = <_Element>c_node._private if element._doc is not doc: old_doc = element._doc @@ -57,6 +57,7 @@ python.Py_INCREF(doc) element._gc_doc = doc python.Py_DECREF(old_doc) + return old_doc ################################################################################ # temporarily make a node the root node of its document @@ -313,6 +314,7 @@ cdef xmlNs* c_nsdef cdef xmlNs* c_del_ns_list cdef cstd.size_t i + cdef list old_docs = [] if not tree._isElementOrXInclude(c_element): return 0 @@ -363,9 +365,11 @@ if c_node is NULL: # no children => back off and continue with siblings and parents - # 4) fix _Document reference (may dealloc the original document!) + # 4) fix _Document reference if c_element._private is not NULL: - _updateProxyDocument(c_element, doc) + old_doc = _updateProxyDocument(c_element, doc) + if old_doc not in old_docs: + old_docs.append(old_doc) if c_element is c_start_node: break # all done @@ -381,9 +385,11 @@ if c_element is NULL or not tree._isElementOrXInclude(c_element): break - # 4) fix _Document reference (may dealloc the original document!) + # 4) fix _Document reference if c_element._private is not NULL: - _updateProxyDocument(c_element, doc) + old_doc = _updateProxyDocument(c_element, doc) + if old_doc not in old_docs: + old_docs.append(old_doc) if c_element is c_start_node: break @@ -400,6 +406,9 @@ if doc._c_doc.dict is not c_source_doc.dict: fixThreadDictNames(c_start_node, c_source_doc.dict, doc._c_doc.dict) + # *now* free all _Documents the original tree referred to + old_docs = None + # free now unused namespace declarations if c_del_ns_list is not NULL: tree.xmlFreeNsList(c_del_ns_list) From scoder at codespeak.net Fri Sep 5 14:31:42 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 5 Sep 2008 14:31:42 +0200 (CEST) Subject: [Lxml-checkins] r57845 - in lxml/branch/lxml-2.1: . doc Message-ID: <20080905123142.76A4F16A0AD@codespeak.net> Author: scoder Date: Fri Sep 5 14:31:38 2008 New Revision: 57845 Modified: lxml/branch/lxml-2.1/CHANGES.txt lxml/branch/lxml-2.1/doc/main.txt lxml/branch/lxml-2.1/version.txt Log: release version 2.1.2 Modified: lxml/branch/lxml-2.1/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.1/CHANGES.txt (original) +++ lxml/branch/lxml-2.1/CHANGES.txt Fri Sep 5 14:31:38 2008 @@ -2,8 +2,8 @@ lxml changelog ============== -Under development -================= +2.1.2 (2008-09-05) +================== Features added -------------- Modified: lxml/branch/lxml-2.1/doc/main.txt ============================================================================== --- lxml/branch/lxml-2.1/doc/main.txt (original) +++ lxml/branch/lxml-2.1/doc/main.txt Fri Sep 5 14:31:38 2008 @@ -147,8 +147,8 @@ source release. If you can't wait, consider trying a less recent release version first. -The latest version is `lxml 2.1.1`_, released 2008-07-24 -(`changes for 2.1.1`_). `Older versions`_ are listed below. +The latest version is `lxml 2.1.2`_, released 2008-09-05 +(`changes for 2.1.2`_). `Older versions`_ are listed below. Please take a look at the `installation instructions`_! @@ -220,7 +220,9 @@ `2.0 `_ and the `current in-development version `_. -.. _`PDF documentation`: lxmldoc-2.1.1.pdf +.. _`PDF documentation`: lxmldoc-2.1.2.pdf + +* `lxml 2.1.1`_, released 2008-07-24 (`changes for 2.1.1`_) * `lxml 2.1`_, released 2008-07-09 (`changes for 2.1`_) @@ -300,6 +302,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.1.2`: lxml-2.1.2.tgz .. _`lxml 2.1.1`: lxml-2.1.1.tgz .. _`lxml 2.1`: lxml-2.1.tgz .. _`lxml 2.1beta3`: lxml-2.1beta3.tgz @@ -340,6 +343,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.1.2`: changes-2.1.2.html .. _`changes for 2.1.1`: changes-2.1.1.html .. _`changes for 2.1`: changes-2.1.html .. _`changes for 2.1beta3`: changes-2.1beta3.html Modified: lxml/branch/lxml-2.1/version.txt ============================================================================== --- lxml/branch/lxml-2.1/version.txt (original) +++ lxml/branch/lxml-2.1/version.txt Fri Sep 5 14:31:38 2008 @@ -1 +1 @@ -2.1.1 +2.1.2 From scoder at codespeak.net Fri Sep 5 14:32:21 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 5 Sep 2008 14:32:21 +0200 (CEST) Subject: [Lxml-checkins] r57846 - lxml/tag/lxml-2.1.2 Message-ID: <20080905123221.4251916A099@codespeak.net> Author: scoder Date: Fri Sep 5 14:32:19 2008 New Revision: 57846 Added: lxml/tag/lxml-2.1.2/ - copied from r57845, lxml/branch/lxml-2.1/ Log: tag for 2.1.2 From scoder at codespeak.net Fri Sep 5 14:32:40 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 5 Sep 2008 14:32:40 +0200 (CEST) Subject: [Lxml-checkins] r57847 - in lxml/branch/lxml-2.0: . doc Message-ID: <20080905123240.BEA2116A099@codespeak.net> Author: scoder Date: Fri Sep 5 14:32:39 2008 New Revision: 57847 Modified: lxml/branch/lxml-2.0/CHANGES.txt lxml/branch/lxml-2.0/doc/main.txt lxml/branch/lxml-2.0/version.txt Log: release version 2.0.9 Modified: lxml/branch/lxml-2.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.0/CHANGES.txt (original) +++ lxml/branch/lxml-2.0/CHANGES.txt Fri Sep 5 14:32:39 2008 @@ -2,11 +2,8 @@ lxml changelog ============== -Under development -================= - -Features added --------------- +2.0.9 (2008-09-05) +================== Bugs fixed ---------- @@ -16,9 +13,6 @@ * Target parser did not honour the ``recover`` option and raised an exception instead of calling ``.close()`` on the target. -Other changes -------------- - 2.0.8 (2008-07-24) ================== Modified: lxml/branch/lxml-2.0/doc/main.txt ============================================================================== --- lxml/branch/lxml-2.0/doc/main.txt (original) +++ lxml/branch/lxml-2.0/doc/main.txt Fri Sep 5 14:32:39 2008 @@ -146,8 +146,8 @@ source release. If you can't wait, consider trying a less recent release version first. -The latest version is `lxml 2.0.8`_, released 2008-07-24 -(`changes for 2.0.8`_). `Older versions`_ are listed below. +The latest version is `lxml 2.0.9`_, released 2008-09-05 +(`changes for 2.0.9`_). `Older versions`_ are listed below. Please take a look at the `installation instructions`_! @@ -215,7 +215,9 @@ Old Versions ------------ -.. _`PDF documentation`: lxmldoc-2.0.8.pdf +.. _`PDF documentation`: lxmldoc-2.0.9.pdf + +* `lxml 2.0.8`_, released 2008-07-24 (`changes for 2.0.8`_) * `lxml 2.0.7`_, released 2008-06-20 (`changes for 2.0.7`_) @@ -283,6 +285,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.0.9`: lxml-2.0.9.tgz .. _`lxml 2.0.8`: lxml-2.0.8.tgz .. _`lxml 2.0.7`: lxml-2.0.7.tgz .. _`lxml 2.0.6`: lxml-2.0.6.tgz @@ -317,6 +320,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.0.9`: changes-2.0.9.html .. _`changes for 2.0.8`: changes-2.0.8.html .. _`changes for 2.0.7`: changes-2.0.7.html .. _`changes for 2.0.6`: changes-2.0.6.html Modified: lxml/branch/lxml-2.0/version.txt ============================================================================== --- lxml/branch/lxml-2.0/version.txt (original) +++ lxml/branch/lxml-2.0/version.txt Fri Sep 5 14:32:39 2008 @@ -1 +1 @@ -2.0.8 +2.0.9 From scoder at codespeak.net Fri Sep 5 14:33:13 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 5 Sep 2008 14:33:13 +0200 (CEST) Subject: [Lxml-checkins] r57848 - lxml/tag/lxml-2.0.9 Message-ID: <20080905123313.4ED4816A099@codespeak.net> Author: scoder Date: Fri Sep 5 14:33:12 2008 New Revision: 57848 Added: lxml/tag/lxml-2.0.9/ - copied from r57847, lxml/branch/lxml-2.0/ Log: tag for 2.0.9 From scoder at codespeak.net Fri Sep 5 14:34:34 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 5 Sep 2008 14:34:34 +0200 (CEST) Subject: [Lxml-checkins] r57849 - lxml/tag/lxml-2.0.8 Message-ID: <20080905123434.57BCB16A09E@codespeak.net> Author: scoder Date: Fri Sep 5 14:34:33 2008 New Revision: 57849 Added: lxml/tag/lxml-2.0.8/ - copied from r56762, lxml/branch/lxml-2.0/ Log: tag for 2.0.8 From scoder at codespeak.net Fri Sep 5 14:35:11 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 5 Sep 2008 14:35:11 +0200 (CEST) Subject: [Lxml-checkins] r57850 - lxml/tag/lxml-2.1.1 Message-ID: <20080905123511.9738316A0A1@codespeak.net> Author: scoder Date: Fri Sep 5 14:35:10 2008 New Revision: 57850 Added: lxml/tag/lxml-2.1.1/ - copied from r56760, lxml/branch/lxml-2.1/ Log: tag for 2.1.1 From scoder at codespeak.net Fri Sep 5 14:36:45 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 5 Sep 2008 14:36:45 +0200 (CEST) Subject: [Lxml-checkins] r57851 - lxml/tag/lxml-2.1 Message-ID: <20080905123645.7397016A0A1@codespeak.net> Author: scoder Date: Fri Sep 5 14:36:44 2008 New Revision: 57851 Added: lxml/tag/lxml-2.1/ - copied from r56392, lxml/trunk/ Log: tag for 2.1 From scoder at codespeak.net Fri Sep 5 14:38:13 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 5 Sep 2008 14:38:13 +0200 (CEST) Subject: [Lxml-checkins] r57852 - lxml/tag/lxml-2.1 Message-ID: <20080905123813.2B23516A05D@codespeak.net> Author: scoder Date: Fri Sep 5 14:38:12 2008 New Revision: 57852 Removed: lxml/tag/lxml-2.1/ Log: wrong tag From scoder at codespeak.net Fri Sep 5 14:38:19 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 5 Sep 2008 14:38:19 +0200 (CEST) Subject: [Lxml-checkins] r57853 - lxml/tag/lxml-2.1 Message-ID: <20080905123819.C572B16A09C@codespeak.net> Author: scoder Date: Fri Sep 5 14:38:18 2008 New Revision: 57853 Added: lxml/tag/lxml-2.1/ - copied from r56386, lxml/trunk/ Log: tag for 2.1 From scoder at codespeak.net Fri Sep 5 14:55:14 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 5 Sep 2008 14:55:14 +0200 (CEST) Subject: [Lxml-checkins] r57855 - lxml/branch/lxml-2.1/doc Message-ID: <20080905125514.F0CEB169EA5@codespeak.net> Author: scoder Date: Fri Sep 5 14:55:14 2008 New Revision: 57855 Modified: lxml/branch/lxml-2.1/doc/main.txt Log: fixed version list in docs Modified: lxml/branch/lxml-2.1/doc/main.txt ============================================================================== --- lxml/branch/lxml-2.1/doc/main.txt (original) +++ lxml/branch/lxml-2.1/doc/main.txt Fri Sep 5 14:55:14 2008 @@ -234,7 +234,9 @@ * `lxml 2.1alpha1`_, released 2008-03-27 (`changes for 2.1alpha1`_) -* `lxml 2.0.8`_, released 2008-07-24 (`changes for 2.0.7`_) +* `lxml 2.0.9`_, released 2008-09-05 (`changes for 2.0.9`_) + +* `lxml 2.0.8`_, released 2008-07-24 (`changes for 2.0.8`_) * `lxml 2.0.7`_, released 2008-06-20 (`changes for 2.0.7`_) @@ -309,6 +311,7 @@ .. _`lxml 2.1beta2`: lxml-2.1beta2.tgz .. _`lxml 2.1beta1`: lxml-2.1beta1.tgz .. _`lxml 2.1alpha1`: lxml-2.1alpha1.tgz +.. _`lxml 2.0.9`: lxml-2.0.9.tgz .. _`lxml 2.0.8`: lxml-2.0.8.tgz .. _`lxml 2.0.7`: lxml-2.0.7.tgz .. _`lxml 2.0.6`: lxml-2.0.6.tgz @@ -350,6 +353,7 @@ .. _`changes for 2.1beta2`: changes-2.1beta2.html .. _`changes for 2.1beta1`: changes-2.1beta1.html .. _`changes for 2.1alpha1`: changes-2.1alpha1.html +.. _`changes for 2.0.9`: changes-2.0.9.html .. _`changes for 2.0.8`: changes-2.0.8.html .. _`changes for 2.0.7`: changes-2.0.7.html .. _`changes for 2.0.6`: changes-2.0.6.html From scoder at codespeak.net Fri Sep 5 15:05:09 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 5 Sep 2008 15:05:09 +0200 (CEST) Subject: [Lxml-checkins] r57857 - lxml/branch/lxml-2.0 Message-ID: <20080905130509.F3F0216A086@codespeak.net> Author: scoder Date: Fri Sep 5 15:05:08 2008 New Revision: 57857 Modified: lxml/branch/lxml-2.0/setup.py Log: declare 2.0.x mature Modified: lxml/branch/lxml-2.0/setup.py ============================================================================== --- lxml/branch/lxml-2.0/setup.py (original) +++ lxml/branch/lxml-2.0/setup.py Fri Sep 5 15:05:08 2008 @@ -88,7 +88,7 @@ """ + branch_link) % { "branch_version" : versioninfo.branch_version() }) + versioninfo.changes()), classifiers = [ - versioninfo.dev_status(), + 'Development Status :: 6 - Mature', # versioninfo.dev_status(), 'Intended Audience :: Developers', 'Intended Audience :: Information Technology', 'License :: OSI Approved :: BSD License', From scoder at codespeak.net Fri Sep 5 21:07:53 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 5 Sep 2008 21:07:53 +0200 (CEST) Subject: [Lxml-checkins] r57866 - in lxml/trunk: . doc Message-ID: <20080905190753.5145F16A104@codespeak.net> Author: scoder Date: Fri Sep 5 21:07:49 2008 New Revision: 57866 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/doc/main.txt Log: r4769 at delle: sbehnel | 2008-09-05 14:49:27 +0200 post-release updates Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Sep 5 21:07:49 2008 @@ -8,15 +8,28 @@ Features added -------------- +* New options for exclusive C14N and C14N without comments. + +* Instantiating a custom Element classes creates a new Element. + +Bugs fixed +---------- + +Other changes +------------- + + +2.1.2 (2008-09-05) +================== + +Features added +-------------- + * lxml.etree now tries to find the absolute path name of files when parsing from a file-like object. This helps custom resolvers when resolving relative URLs, as lixbml2 can prepend them with the path of the source document. -* New options for exclusive C14N and C14N without comments. - -* Instantiating a custom Element classes creates a new Element. - Bugs fixed ---------- @@ -29,6 +42,18 @@ ------------- +2.0.9 (2008-09-05) +================== + +Bugs fixed +---------- + +* Memory problem when passing documents between threads. + +* Target parser did not honour the ``recover`` option and raised an + exception instead of calling ``.close()`` on the target. + + 2.1.1 (2008-07-24) ================== Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Fri Sep 5 21:07:49 2008 @@ -147,8 +147,8 @@ source release. If you can't wait, consider trying a less recent release version first. -The latest version is `lxml 2.1.1`_, released 2008-07-24 -(`changes for 2.1.1`_). `Older versions`_ are listed below. +The latest version is `lxml 2.1.2`_, released 2008-09-05 +(`changes for 2.1.2`_). `Older versions`_ are listed below. Please take a look at the `installation instructions`_! @@ -220,12 +220,16 @@ `2.0 `_ and the `current in-development version `_. -.. _`PDF documentation`: lxmldoc-2.1.1.pdf +.. _`PDF documentation`: lxmldoc-2.1.2.pdf + +* `lxml 2.1.2`_, released 2008-09-05 (`changes for 2.1.2`_) * `lxml 2.1.1`_, released 2008-07-24 (`changes for 2.1.1`_) * `lxml 2.1`_, released 2008-07-09 (`changes for 2.1`_) +* `lxml 2.0.9`_, released 2008-09-05 (`changes for 2.0.9`_) + * `lxml 2.0.8`_, released 2008-07-24 (`changes for 2.0.8`_) * `lxml 2.0.7`_, released 2008-06-20 (`changes for 2.0.7`_) @@ -294,8 +298,10 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.1.2`: lxml-2.1.2.tgz .. _`lxml 2.1.1`: lxml-2.1.1.tgz .. _`lxml 2.1`: lxml-2.1.tgz +.. _`lxml 2.0.9`: lxml-2.0.9.tgz .. _`lxml 2.0.8`: lxml-2.0.8.tgz .. _`lxml 2.0.7`: lxml-2.0.7.tgz .. _`lxml 2.0.6`: lxml-2.0.6.tgz @@ -330,8 +336,10 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.1.2`: changes-2.1.2.html .. _`changes for 2.1.1`: changes-2.1.1.html .. _`changes for 2.1`: changes-2.1.html +.. _`changes for 2.0.9`: changes-2.0.9.html .. _`changes for 2.0.8`: changes-2.0.8.html .. _`changes for 2.0.7`: changes-2.0.7.html .. _`changes for 2.0.6`: changes-2.0.6.html From scoder at codespeak.net Fri Sep 5 21:08:05 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 5 Sep 2008 21:08:05 +0200 (CEST) Subject: [Lxml-checkins] r57867 - in lxml/trunk: . src/lxml Message-ID: <20080905190805.E63D116A104@codespeak.net> Author: scoder Date: Fri Sep 5 21:08:04 2008 New Revision: 57867 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/proxy.pxi Log: r4770 at delle: sbehnel | 2008-09-05 21:07:40 +0200 some cleanup after last fix: one document reference should be enough Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Fri Sep 5 21:08:04 2008 @@ -314,7 +314,6 @@ cdef xmlNs* c_nsdef cdef xmlNs* c_del_ns_list cdef cstd.size_t i - cdef list old_docs = [] if not tree._isElementOrXInclude(c_element): return 0 @@ -368,8 +367,6 @@ # 4) fix _Document reference if c_element._private is not NULL: old_doc = _updateProxyDocument(c_element, doc) - if old_doc not in old_docs: - old_docs.append(old_doc) if c_element is c_start_node: break # all done @@ -388,8 +385,6 @@ # 4) fix _Document reference if c_element._private is not NULL: old_doc = _updateProxyDocument(c_element, doc) - if old_doc not in old_docs: - old_docs.append(old_doc) if c_element is c_start_node: break @@ -406,8 +401,8 @@ if doc._c_doc.dict is not c_source_doc.dict: fixThreadDictNames(c_start_node, c_source_doc.dict, doc._c_doc.dict) - # *now* free all _Documents the original tree referred to - old_docs = None + # *now* allow the original _Document to be deleted + old_doc = None # free now unused namespace declarations if c_del_ns_list is not NULL: From scoder at codespeak.net Sun Sep 7 15:56:08 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 7 Sep 2008 15:56:08 +0200 (CEST) Subject: [Lxml-checkins] r57946 - in lxml/trunk: . src/lxml Message-ID: <20080907135608.0FC7A16A19B@codespeak.net> Author: scoder Date: Sun Sep 7 15:56:07 2008 New Revision: 57946 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/apihelpers.pxi Log: r4784 at delle: sbehnel | 2008-09-07 15:48:36 +0200 code cleanup and minor speed-up Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Sun Sep 7 15:56:07 2008 @@ -446,11 +446,11 @@ value = _utf8(value) c_value = _cstr(value) if ns is None: - tree.xmlSetProp(element._c_node, c_tag, c_value) + c_ns = NULL else: c_ns = element._doc._findOrBuildNodeNs(element._c_node, _cstr(ns), NULL) - tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value) + tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value) return 0 cdef int _delAttribute(_Element element, key) except -1: From scoder at codespeak.net Sun Sep 7 15:56:14 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 7 Sep 2008 15:56:14 +0200 (CEST) Subject: [Lxml-checkins] r57947 - in lxml/trunk: . src/lxml Message-ID: <20080907135614.9F68D16A19F@codespeak.net> Author: scoder Date: Sun Sep 7 15:56:13 2008 New Revision: 57947 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/proxy.pxi Log: r4785 at delle: sbehnel | 2008-09-07 15:55:32 +0200 another restructuring in moveNodeToDocument(): use separate steps for namespaces, dict cleanup and doc references (last\!) Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Sun Sep 7 15:56:13 2008 @@ -44,12 +44,11 @@ python.Py_XDECREF(proxy._gc_doc) proxy._gc_doc = NULL -cdef inline _Document _updateProxyDocument(xmlNode* c_node, _Document doc): - u"""Replace the document reference of a proxy. - - This may deallocate the original document of the proxy! +cdef inline void _updateProxyDocument(xmlNode* c_node, _Document doc): + u"""Replace the document reference of a proxy and return the old one + iff it was replaced (None otherwise). """ - cdef _Document old_doc = None + cdef _Document old_doc cdef _Element element = <_Element>c_node._private if element._doc is not doc: old_doc = element._doc @@ -57,7 +56,6 @@ python.Py_INCREF(doc) element._gc_doc = doc python.Py_DECREF(old_doc) - return old_doc ################################################################################ # temporarily make a node the root node of its document @@ -313,7 +311,7 @@ cdef xmlNs* c_ns_next cdef xmlNs* c_nsdef cdef xmlNs* c_del_ns_list - cdef cstd.size_t i + cdef cstd.size_t i, proxy_count = 0 if not tree._isElementOrXInclude(c_element): return 0 @@ -326,7 +324,11 @@ c_ns_cache.size = 0 c_ns_cache.last = 0 - while c_element is not NULL: + tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1) + if tree._isElementOrXInclude(c_element): + if c_element._private is not NULL: + proxy_count += 1 + # 1) cut out namespaces defined here that are already known by # the ancestors if c_element.nsDef is not NULL: @@ -354,55 +356,7 @@ c_node = c_element.properties else: c_node = c_node.next - - # traverse to next element, start with children - c_node = c_element.children - while c_node is not NULL and \ - not tree._isElementOrXInclude(c_node): - c_node = c_node.next - - if c_node is NULL: - # no children => back off and continue with siblings and parents - - # 4) fix _Document reference - if c_element._private is not NULL: - old_doc = _updateProxyDocument(c_element, doc) - - if c_element is c_start_node: - break # all done - - # continue with siblings - c_node = c_element.next - while (c_node is not NULL and - not tree._isElementOrXInclude(c_node)): - c_node = c_node.next - # if that didn't help, back off through parents' siblings - while c_node is NULL: - c_element = c_element.parent - if c_element is NULL or not tree._isElementOrXInclude(c_element): - break - - # 4) fix _Document reference - if c_element._private is not NULL: - old_doc = _updateProxyDocument(c_element, doc) - - if c_element is c_start_node: - break - # parents already done -> look for their siblings - c_node = c_element.next - while (c_node is not NULL and - not tree._isElementOrXInclude(c_node)): - c_node = c_node.next - if c_node is c_start_node: - break # all done - c_element = c_node - - # 3) fix the names in the tree in case we moved it to a different thread - if doc._c_doc.dict is not c_source_doc.dict: - fixThreadDictNames(c_start_node, c_source_doc.dict, doc._c_doc.dict) - - # *now* allow the original _Document to be deleted - old_doc = None + tree.END_FOR_EACH_FROM(c_element) # free now unused namespace declarations if c_del_ns_list is not NULL: @@ -414,9 +368,31 @@ if c_ns_cache.old is not NULL: cstd.free(c_ns_cache.old) + # 3) fix the names in the tree if we moved it from a different thread + if doc._c_doc.dict is not c_source_doc.dict: + fixThreadDictNames(c_start_node, c_source_doc.dict, doc._c_doc.dict) + + # 4) fix _Document references + # (and potentially deallocate the source document) + if proxy_count > 0: + if proxy_count == 1 and c_start_node._private is not NULL: + _updateProxyDocument(c_start_node, doc) + else: + fixElementDocument(c_start_node, doc, proxy_count) + return 0 +cdef void fixElementDocument(xmlNode* c_element, _Document doc, + cstd.size_t proxy_count): + tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1) + if c_element._private is not NULL: + _updateProxyDocument(c_element, doc) + proxy_count -= 1 + if proxy_count == 0: + return + tree.END_FOR_EACH_FROM(c_element) + cdef void fixThreadDictNames(xmlNode* c_element, tree.xmlDict* c_src_dict, tree.xmlDict* c_dict) nogil: From scoder at codespeak.net Sun Sep 7 22:23:27 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 7 Sep 2008 22:23:27 +0200 (CEST) Subject: [Lxml-checkins] r57956 - in lxml/trunk: . benchmark src/lxml Message-ID: <20080907202327.B585316A1FD@codespeak.net> Author: scoder Date: Sun Sep 7 22:23:26 2008 New Revision: 57956 Modified: lxml/trunk/ (props changed) lxml/trunk/benchmark/bench_etree.py lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/python.pxd Log: r4788 at delle: sbehnel | 2008-09-07 19:55:29 +0200 streamlined the _utf8() function (a very common helper function) Modified: lxml/trunk/benchmark/bench_etree.py ============================================================================== --- lxml/trunk/benchmark/bench_etree.py (original) +++ lxml/trunk/benchmark/bench_etree.py Sun Sep 7 22:23:26 2008 @@ -228,6 +228,7 @@ @with_attributes(True) @children + @nochange def bench_get_attributes(self, children): for child in children: child.get('bla1') Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Sun Sep 7 22:23:26 2008 @@ -1130,13 +1130,17 @@ is_non_ascii = 0 while s < c_end: c = s[0] - if c & 0x80: - is_non_ascii = 1 - elif c == c'\0': - return -1 # invalid! - elif is_non_ascii == 0 and not tree.xmlIsChar_ch(c): + if not tree.xmlIsChar_ch(c): return -1 # invalid! + elif c & 0x80: + is_non_ascii = 1 + break s += 1 + if is_non_ascii: + while s < c_end: + if not tree.xmlIsChar_ch(s[0]): + return -1 # invalid! + s += 1 return is_non_ascii cdef object funicode(char* s): @@ -1161,17 +1165,19 @@ return python.PyString_FromStringAndSize(s, slen) cdef object _utf8(object s): - if python.PyString_Check(s): - if isutf8py(s): - raise ValueError, \ - u"All strings must be XML compatible: Unicode or ASCII, no NULL bytes" - elif python.PyUnicode_Check(s): + cdef bint invalid + if python.PyString_CheckExact(s): + invalid = isutf8py(s) + elif python.PyUnicode_CheckExact(s) or python.PyUnicode_Check(s): s = python.PyUnicode_AsUTF8String(s) - if isutf8py(s) == -1: - raise ValueError, \ - u"All strings must be XML compatible: Unicode or ASCII, no NULL bytes" + invalid = isutf8py(s) == -1 + elif python.PyString_Check(s): + invalid = isutf8py(s) else: raise TypeError, u"Argument must be string or unicode." + if invalid: + raise ValueError, \ + u"All strings must be XML compatible: Unicode or ASCII, no NULL bytes" return s cdef bint _isFilePath(char* c_path): Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Sun Sep 7 22:23:26 2008 @@ -22,9 +22,10 @@ cdef FILE* PyFile_AsFile(object p) - cdef int PyUnicode_Check(object obj) - cdef int PyString_Check(object obj) - cdef int PyString_CheckExact(object obj) + cdef bint PyUnicode_Check(object obj) + cdef bint PyUnicode_CheckExact(object obj) + cdef bint PyString_Check(object obj) + cdef bint PyString_CheckExact(object obj) cdef object PyUnicode_FromEncodedObject(object s, char* encoding, char* errors) From scoder at codespeak.net Sun Sep 7 22:23:33 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 7 Sep 2008 22:23:33 +0200 (CEST) Subject: [Lxml-checkins] r57957 - in lxml/trunk: . src/lxml Message-ID: <20080907202333.8B00216A213@codespeak.net> Author: scoder Date: Sun Sep 7 22:23:30 2008 New Revision: 57957 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/apihelpers.pxi Log: r4789 at delle: sbehnel | 2008-09-07 22:18:12 +0200 fix API string input checking, misc. fixes and switch-statement optimisations Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Sep 7 22:23:30 2008 @@ -15,6 +15,9 @@ Bugs fixed ---------- +* 0-bytes could slip through the API when used inside of Unicode + strings. + Other changes ------------- Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Sun Sep 7 22:23:30 2008 @@ -1120,7 +1120,11 @@ c = s[0] return 0 -cdef int isutf8py(pystring): +cdef int check_string_utf8(pystring): + u"""Check if a string looks like valid UTF-8 XML content. Returns 0 + for ASCII, 1 for UTF-8 and -1 in the case of errors, such as NULL + bytes or ASCII control characters. + """ cdef char* s cdef char* c_end cdef char c @@ -1130,17 +1134,15 @@ is_non_ascii = 0 while s < c_end: c = s[0] - if not tree.xmlIsChar_ch(c): - return -1 # invalid! - elif c & 0x80: + if c & 0x80: + # skip the entire multi byte sequence + while c & 0x80: + s += 1 + c = s[0] is_non_ascii = 1 - break + elif not tree.xmlIsChar_ch(c): + return -1 # invalid! s += 1 - if is_non_ascii: - while s < c_end: - if not tree.xmlIsChar_ch(s[0]): - return -1 # invalid! - s += 1 return is_non_ascii cdef object funicode(char* s): @@ -1165,14 +1167,14 @@ return python.PyString_FromStringAndSize(s, slen) cdef object _utf8(object s): - cdef bint invalid + cdef int invalid if python.PyString_CheckExact(s): - invalid = isutf8py(s) + invalid = check_string_utf8(s) elif python.PyUnicode_CheckExact(s) or python.PyUnicode_Check(s): s = python.PyUnicode_AsUTF8String(s) - invalid = isutf8py(s) == -1 + invalid = check_string_utf8(s) == -1 elif python.PyString_Check(s): - invalid = isutf8py(s) + invalid = check_string_utf8(s) else: raise TypeError, u"Argument must be string or unicode." if invalid: @@ -1182,6 +1184,7 @@ cdef bint _isFilePath(char* c_path): u"simple heuristic to see if a path is a filename" + cdef char c # test if it looks like an absolute Unix path or a Windows network path if c_path[0] == c'/': return 1 @@ -1192,11 +1195,12 @@ return 1 # test if it looks like a relative path while c_path[0] != c'\0': - if c_path[0] == c':': + c = c_path[0] + if c == c':': return 0 - if c_path[0] == c'/': + elif c == c'/': return 1 - if c_path[0] == c'\\': + elif c == c'\\': return 1 c_path += 1 return 1 @@ -1245,7 +1249,7 @@ if filename is None: return None elif python.PyString_Check(filename): - if not isutf8py(filename): + if not check_string_utf8(filename): # plain ASCII! return filename c_filename = _cstr(filename) @@ -1305,20 +1309,13 @@ return tree.xmlValidateNCName(c_name, 0) == 0 cdef int _htmlNameIsValid(char* c_name): + cdef char c if c_name is NULL or c_name[0] == c'\0': return 0 while c_name[0] != c'\0': - if c_name[0] == c'&' or \ - c_name[0] == c'<' or \ - c_name[0] == c'>' or \ - c_name[0] == c'/' or \ - c_name[0] == c'"' or \ - c_name[0] == c"'" or \ - c_name[0] == c'\x09' or \ - c_name[0] == c'\x0A' or \ - c_name[0] == c'\x0B' or \ - c_name[0] == c'\x0C' or \ - c_name[0] == c'\x20': + c = c_name[0] + if c in (c'&', c'<', c'>', c'/', c'"', c"'", + c'\t', c'\n', c'\x0B', c'\x0C', c'\r', c' '): return 0 c_name = c_name + 1 return 1 From scoder at codespeak.net Sun Sep 7 22:23:38 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 7 Sep 2008 22:23:38 +0200 (CEST) Subject: [Lxml-checkins] r57958 - in lxml/trunk: . src/lxml Message-ID: <20080907202338.06D9B16A214@codespeak.net> Author: scoder Date: Sun Sep 7 22:23:38 2008 New Revision: 57958 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/apihelpers.pxi Log: r4790 at delle: sbehnel | 2008-09-07 22:22:36 +0200 cleanup Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Sun Sep 7 22:23:38 2008 @@ -1133,14 +1133,12 @@ c_end = s + python.PyString_GET_SIZE(pystring) is_non_ascii = 0 while s < c_end: - c = s[0] - if c & 0x80: + if s[0] & 0x80: # skip the entire multi byte sequence - while c & 0x80: + while s[0] & 0x80: s += 1 - c = s[0] is_non_ascii = 1 - elif not tree.xmlIsChar_ch(c): + elif not tree.xmlIsChar_ch(s[0]): return -1 # invalid! s += 1 return is_non_ascii From lxml-checkins at codespeak.net Mon Sep 15 05:22:11 2008 From: lxml-checkins at codespeak.net (VIAGRA INC) Date: Mon, 15 Sep 2008 05:22:11 +0200 (CEST) Subject: [Lxml-checkins] SALE 89% OFF Message-ID: <20080915042004.3172.qmail@cpc2-mfld11-0-0-cust369.nott.cable.ntl.com> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20080915/3d81a058/attachment.htm From scoder at codespeak.net Sun Sep 21 16:15:12 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 21 Sep 2008 16:15:12 +0200 (CEST) Subject: [Lxml-checkins] r58301 - in lxml/trunk: . src/lxml/html Message-ID: <20080921141512.AFF81169F28@codespeak.net> Author: scoder Date: Sun Sep 21 16:15:12 2008 New Revision: 58301 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/html/__init__.py Log: r4794 at delle: sbehnel | 2008-09-09 08:48:03 +0200 regexp cleanup Modified: lxml/trunk/src/lxml/html/__init__.py ============================================================================== --- lxml/trunk/src/lxml/html/__init__.py (original) +++ lxml/trunk/src/lxml/html/__init__.py Sun Sep 21 16:15:12 2008 @@ -1370,7 +1370,7 @@ # This isn't a general match, but it's a match for what libxml2 # specifically serialises: __replace_meta_content_type = re.compile( - r'').sub + r']*>').sub def tostring(doc, pretty_print=False, include_meta_content_type=False, encoding=None, method="html"): From scoder at codespeak.net Sun Sep 21 16:15:17 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 21 Sep 2008 16:15:17 +0200 (CEST) Subject: [Lxml-checkins] r58302 - in lxml/trunk: . src/lxml/tests Message-ID: <20080921141517.02BA6169F2A@codespeak.net> Author: scoder Date: Sun Sep 21 16:15:17 2008 New Revision: 58302 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/tests/test_elementtree.py Log: r4795 at delle: sbehnel | 2008-09-09 09:11:50 +0200 fix namespaces in tests Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Sun Sep 21 16:15:17 2008 @@ -2650,15 +2650,15 @@ def test_iterparse_attrib_ns(self): iterparse = self.etree.iterparse - f = BytesIO('') + f = BytesIO('') - attr_name = '{testns}bla' + attr_name = '{http://testns/}bla' events = [] iterator = iterparse(f, events=('start','end','start-ns','end-ns')) for event, elem in iterator: events.append(event) if event == 'start': - if elem.tag != '{ns1}a': + if elem.tag != '{http://ns1/}a': elem.set(attr_name, 'value') self.assertEquals( From scoder at codespeak.net Sun Sep 21 16:15:22 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 21 Sep 2008 16:15:22 +0200 (CEST) Subject: [Lxml-checkins] r58303 - in lxml/trunk: . doc Message-ID: <20080921141522.8F689169F2C@codespeak.net> Author: scoder Date: Sun Sep 21 16:15:22 2008 New Revision: 58303 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/parsing.txt Log: r4796 at delle: sbehnel | 2008-09-09 09:13:51 +0200 fix namespaces in tests, some test setup fixes Modified: lxml/trunk/doc/parsing.txt ============================================================================== --- lxml/trunk/doc/parsing.txt (original) +++ lxml/trunk/doc/parsing.txt Sun Sep 21 16:15:22 2008 @@ -189,7 +189,8 @@ >>> parser = etree.HTMLParser() >>> tree = etree.parse(StringIO(broken_html), parser) - >>> result = etree.tostring(tree.getroot(), pretty_print=True) + >>> result = etree.tostring(tree.getroot(), + ... pretty_print=True, method="html") >>> print(result) @@ -206,7 +207,7 @@ .. sourcecode:: pycon >>> html = etree.HTML(broken_html) - >>> result = etree.tostring(html, pretty_print=True) + >>> result = etree.tostring(html, pretty_print=True, method="html") >>> print(result) @@ -426,7 +427,7 @@ ... ... text ... texttail - ... + ... ... ... ''' @@ -435,7 +436,7 @@ ... print("%s: %s" % (action, elem.tag)) end: element end: element - end: {testns}empty-element + end: {http://testns/}empty-element end: root The resulting tree is available through the ``root`` property of the iterator: @@ -458,8 +459,8 @@ end: element start: element end: element - start: {testns}empty-element - end: {testns}empty-element + start: {http://testns/}empty-element + end: {http://testns/}empty-element end: root The 'start-ns' and 'end-ns' events notify about namespace @@ -476,7 +477,7 @@ text texttail - + >>> events = ("start", "end", "start-ns", "end-ns") @@ -493,9 +494,9 @@ end: element start: element end: element - start-ns: ('', 'testns') - start: {testns}empty-element - end: {testns}empty-element + start-ns: ('', 'http://testns/') + start: {http://testns/}empty-element + end: {http://testns/}empty-element end-ns end: root @@ -517,11 +518,11 @@ >>> events = ("start", "end") >>> context = etree.iterparse( - ... StringIO(xml), events=events, tag="{testns}*") + ... StringIO(xml), events=events, tag="{http://testns/}*") >>> for action, elem in context: ... print("%s: %s" % (action, elem.tag)) - start: {testns}empty-element - end: {testns}empty-element + start: {http://testns/}empty-element + end: {http://testns/}empty-element Comments and PIs @@ -540,7 +541,7 @@ ... text ... ... texttail - ... + ... ... ... ''' @@ -561,8 +562,8 @@ comment: - another comment - start: element end: element - start: {testns}empty-element - end: {testns}empty-element + start: {http://testns/}empty-element + end: {http://testns/}empty-element end: root >>> print(context.root.tag) From scoder at codespeak.net Sun Sep 21 16:15:26 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 21 Sep 2008 16:15:26 +0200 (CEST) Subject: [Lxml-checkins] r58304 - lxml/trunk Message-ID: <20080921141526.DF131169F72@codespeak.net> Author: scoder Date: Sun Sep 21 16:15:26 2008 New Revision: 58304 Modified: lxml/trunk/ (props changed) lxml/trunk/IDEAS.txt Log: r4797 at delle: sbehnel | 2008-09-17 15:20:13 +0200 idea: reimplement iterparse() Modified: lxml/trunk/IDEAS.txt ============================================================================== --- lxml/trunk/IDEAS.txt (original) +++ lxml/trunk/IDEAS.txt Sun Sep 21 16:15:26 2008 @@ -22,3 +22,12 @@ access check methods * maybe custom resolvers are enough, or can be combined with this? + +* reimplement iterparse() using the libxml2 xmlReader API + + * Advantage: the implementation can be made safer than the current + SAX implementation, as the parser would not interact with the + Python-level tree. + + * Disadvantage: the tree has to be built manually. In the current + SAX based implementation, libxml2 does it for us. From scoder at codespeak.net Sun Sep 21 16:15:33 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 21 Sep 2008 16:15:33 +0200 (CEST) Subject: [Lxml-checkins] r58305 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20080921141533.3F63A16A0AC@codespeak.net> Author: scoder Date: Sun Sep 21 16:15:32 2008 New Revision: 58305 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/tests/test_etree.py Log: r4798 at delle: sbehnel | 2008-09-17 15:21:11 +0200 test case and fix for file(-like) resolver Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sun Sep 21 16:15:32 2008 @@ -441,7 +441,7 @@ c_input = xmlparser.xmlNewInputFromFile( c_context, _cstr(doc_ref._filename)) elif doc_ref._type == PARSER_DATA_FILE: - file_context = _FileReaderContext(doc_ref._file, context, url) + file_context = _FileReaderContext(doc_ref._file, context, url, None) c_input = file_context._createParserInput(c_context) data = file_context else: Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Sun Sep 21 16:15:32 2008 @@ -14,7 +14,7 @@ sys.path.insert(0, this_dir) # needed for Py3 from common_imports import etree, StringIO, BytesIO, HelperTestCase, fileInTestDir -from common_imports import LargeFileLikeUnicode, doctest, make_doctest +from common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest from common_imports import canonicalize, sorted, _str, _bytes print("") @@ -667,6 +667,27 @@ root = tree.getroot() self.assertEquals(root.text, test_url) + def test_resolve_filelike_dtd(self): + parse = self.etree.parse + parser = self.etree.XMLParser(dtd_validation=True) + assertEqual = self.assertEqual + test_url = _str("__nosuch.dtd") + + class MyResolver(self.etree.Resolver): + def resolve(self, url, id, context): + assertEqual(url, test_url) + return self.resolve_file( + SillyFileLike( + _str(''' + ''') % url), context) + + parser.resolvers.add(MyResolver()) + + xml = _str('&myentity;') % test_url + tree = parse(StringIO(xml), parser) + root = tree.getroot() + self.assertEquals(root.text, test_url) + def test_resolve_filename_dtd(self): parse = self.etree.parse parser = self.etree.XMLParser(attribute_defaults=True) @@ -712,6 +733,28 @@ self.assertEquals( root[0].attrib, {'default': 'valueB'}) + def test_resolve_file_dtd(self): + parse = self.etree.parse + parser = self.etree.XMLParser(attribute_defaults=True) + assertEqual = self.assertEqual + test_url = _str("__nosuch.dtd") + + class MyResolver(self.etree.Resolver): + def resolve(self, url, id, context): + assertEqual(url, test_url) + return self.resolve_file( + open(fileInTestDir('test.dtd'), 'rb'), context) + + parser.resolvers.add(MyResolver()) + + xml = _str('') % test_url + tree = parse(StringIO(xml), parser) + root = tree.getroot() + self.assertEquals( + root.attrib, {'default': 'valueA'}) + self.assertEquals( + root[0].attrib, {'default': 'valueB'}) + def test_resolve_empty(self): parse = self.etree.parse parser = self.etree.XMLParser(load_dtd=True) From scoder at codespeak.net Sun Sep 21 16:15:37 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 21 Sep 2008 16:15:37 +0200 (CEST) Subject: [Lxml-checkins] r58306 - in lxml/trunk: . src/lxml Message-ID: <20080921141537.A6DCB169F2A@codespeak.net> Author: scoder Date: Sun Sep 21 16:15:37 2008 New Revision: 58306 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/parser.pxi Log: r4799 at delle: sbehnel | 2008-09-17 21:30:57 +0200 fix for parser encoding keyword Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Sep 21 16:15:37 2008 @@ -15,6 +15,8 @@ Bugs fixed ---------- +* Overriding the parser encoding didn't work for many encodings. + * 0-bytes could slip through the API when used inside of Unicode strings. Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sun Sep 21 16:15:37 2008 @@ -683,6 +683,7 @@ def __init__(self, int parse_options, bint for_html, XMLSchema schema, remove_comments, remove_pis, strip_cdata, target, filename, encoding): + cdef tree.xmlCharEncodingHandler* enchandler cdef int c_encoding if not isinstance(self, HTMLParser) and \ not isinstance(self, XMLParser) and \ @@ -705,10 +706,10 @@ self._default_encoding_int = tree.XML_CHAR_ENCODING_NONE else: encoding = _utf8(encoding) - c_encoding = tree.xmlParseCharEncoding(_cstr(encoding)) - if c_encoding == tree.XML_CHAR_ENCODING_ERROR or \ - c_encoding == tree.XML_CHAR_ENCODING_NONE: + enchandler = tree.xmlFindCharEncodingHandler(_cstr(encoding)) + if enchandler is NULL: raise LookupError, u"unknown encoding: '%s'" % encoding + tree.xmlCharEncCloseFunc(enchandler) self._default_encoding = encoding self._default_encoding_int = c_encoding From scoder at codespeak.net Sun Sep 21 16:15:42 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 21 Sep 2008 16:15:42 +0200 (CEST) Subject: [Lxml-checkins] r58307 - in lxml/trunk: . src/lxml Message-ID: <20080921141542.0140A169F72@codespeak.net> Author: scoder Date: Sun Sep 21 16:15:42 2008 New Revision: 58307 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/iterparse.pxi lxml/trunk/src/lxml/parser.pxi Log: r4800 at delle: sbehnel | 2008-09-17 22:13:58 +0200 fix encoding override also for the feed parser Modified: lxml/trunk/src/lxml/iterparse.pxi ============================================================================== --- lxml/trunk/src/lxml/iterparse.pxi (original) +++ lxml/trunk/src/lxml/iterparse.pxi Sun Sep 21 16:15:42 2008 @@ -415,6 +415,19 @@ context = <_IterparseContext>self._getPushParserContext() __GLOBAL_PARSER_CONTEXT.initParserDict(context._c_ctxt) + + if self._default_encoding is not None: + if self._for_html: + error = _htmlCtxtResetPush( + context._c_ctxt, NULL, 0, + _cstr(self._default_encoding), self._parse_options) + else: + xmlparser.xmlCtxtUseOptions( + context._c_ctxt, self._parse_options) + error = xmlparser.xmlCtxtResetPush( + context._c_ctxt, NULL, 0, NULL, + _cstr(self._default_encoding)) + context.prepare() # parser will not be unlocked - no other methods supported Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sun Sep 21 16:15:42 2008 @@ -678,7 +678,6 @@ cdef object _filename cdef object _target cdef object _default_encoding - cdef int _default_encoding_int def __init__(self, int parse_options, bint for_html, XMLSchema schema, remove_comments, remove_pis, strip_cdata, target, @@ -703,7 +702,6 @@ if encoding is None: self._default_encoding = None - self._default_encoding_int = tree.XML_CHAR_ENCODING_NONE else: encoding = _utf8(encoding) enchandler = tree.xmlFindCharEncodingHandler(_cstr(encoding)) @@ -711,7 +709,6 @@ raise LookupError, u"unknown encoding: '%s'" % encoding tree.xmlCharEncCloseFunc(enchandler) self._default_encoding = encoding - self._default_encoding_int = c_encoding cdef _ParserContext _getParserContext(self): cdef xmlparser.xmlParserCtxt* pctxt @@ -777,7 +774,7 @@ c_filename = NULL if self._for_html: c_ctxt = htmlparser.htmlCreatePushParserCtxt( - NULL, NULL, NULL, 0, c_filename, self._default_encoding_int) + NULL, NULL, NULL, 0, c_filename, tree.XML_CHAR_ENCODING_NONE) if c_ctxt is not NULL: htmlparser.htmlCtxtUseOptions(c_ctxt, self._parse_options) else: @@ -785,9 +782,6 @@ NULL, NULL, NULL, 0, c_filename) if c_ctxt is not NULL: xmlparser.xmlCtxtUseOptions(c_ctxt, self._parse_options) - if self._default_encoding_int != tree.XML_CHAR_ENCODING_NONE: - xmlparser.xmlSwitchEncoding( - c_ctxt, self._default_encoding_int) return c_ctxt property error_log: