From scoder at codespeak.net Tue Oct 13 09:15:03 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 13 Oct 2009 09:15:03 +0200 (CEST) Subject: [Lxml-checkins] r68360 - in lxml/trunk: . src/lxml/tests Message-ID: <20091013071503.F09A0168053@codespeak.net> Author: scoder Date: Tue Oct 13 09:15:02 2009 New Revision: 68360 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/tests/common_imports.py lxml/trunk/src/lxml/tests/test_elementtree.py Log: r5272 at delle: sbehnel | 2009-10-01 12:25:59 +0200 enable compatible comparison tests for ET 1.2.x Modified: lxml/trunk/src/lxml/tests/common_imports.py ============================================================================== --- lxml/trunk/src/lxml/tests/common_imports.py (original) +++ lxml/trunk/src/lxml/tests/common_imports.py Tue Oct 13 09:15:02 2009 @@ -22,9 +22,9 @@ ElementTree = None if hasattr(ElementTree, 'VERSION'): - if make_version_tuple(ElementTree.VERSION)[:2] < (1,3): - # compatibility tests require ET 1.3+ - ElementTree = None + ET_VERSION = make_version_tuple(ElementTree.VERSION) +else: + ET_VERSION = (0,0,0) try: import cElementTree # standard ET @@ -35,9 +35,18 @@ cElementTree = None if hasattr(cElementTree, 'VERSION'): - if make_version_tuple(cElementTree.VERSION)[:2] <= (1,0): - # compatibility tests do not run with cET 1.0.7 - cElementTree = None + CET_VERSION = make_version_tuple(cElementTree.VERSION) +else: + CET_VERSION = (0,0,0) + +def filter_by_version(test_class, version_dict, current_version): + """Remove test methods that do not work with the current lib version. + """ + find_required_version = version_dict.get + for name in dir(test_class): + expected_version = find_required_version(name, (0,0,0)) + if expected_version > current_version: + setattr(test_class, name, None) try: import doctest Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Tue Oct 13 09:15:02 2009 @@ -15,14 +15,19 @@ if this_dir not in sys.path: sys.path.insert(0, this_dir) # needed for Py3 -from common_imports import StringIO, BytesIO, etree, ElementTree, cElementTree -from common_imports import fileInTestDir, canonicalize, HelperTestCase +from common_imports import StringIO, BytesIO, etree +from common_imports import ElementTree, cElementTree, ET_VERSION, CET_VERSION +from common_imports import filter_by_version, fileInTestDir, canonicalize, HelperTestCase from common_imports import _str, _bytes +if cElementTree is not None and CET_VERSION <= (1,0,7): + cElementTree = None + +if ElementTree is not None: + print("Comparing with ElementTree %s" % getattr(ElementTree, "VERSION", "?")) + if cElementTree is not None: - if tuple([int(n) for n in - getattr(cElementTree, "VERSION", "0.0").split(".")]) <= (1,0,7): - cElementTree = None + print("Comparing with cElementTree %s" % getattr(cElementTree, "VERSION", "?")) try: reversed @@ -34,6 +39,8 @@ class ETreeTestCaseBase(HelperTestCase): etree = None + required_versions_ET = {} + required_versions_cET = {} def test_element(self): for i in range(10): @@ -658,6 +665,7 @@ self.assertEquals(0, len(root)) self.assertEquals('This is a text.', root.text) + required_versions_ET['test_fromstringlist'] = (1,3) def test_fromstringlist(self): fromstringlist = self.etree.fromstringlist @@ -666,6 +674,7 @@ self.assertEquals(0, len(root)) self.assertEquals('This is a text.', root.text) + required_versions_ET['test_fromstringlist_characters'] = (1,3) def test_fromstringlist_characters(self): fromstringlist = self.etree.fromstringlist @@ -673,6 +682,7 @@ self.assertEquals(0, len(root)) self.assertEquals('This is a text.', root.text) + required_versions_ET['test_fromstringlist_single'] = (1,3) def test_fromstringlist_single(self): fromstringlist = self.etree.fromstringlist @@ -780,6 +790,7 @@ result.append(el1.tag) self.assertEquals(['one','one', 'two', 'two', 'one', 'two'], result) + required_versions_ET['test_itertext'] = (1,3) def test_itertext(self): # ET 1.3+ XML = self.etree.XML @@ -789,6 +800,7 @@ self.assertEquals(["RTEXT", "ATAIL", "CTEXT", "CTAIL"], text) + required_versions_ET['test_itertext_child'] = (1,3) def test_itertext_child(self): # ET 1.3+ XML = self.etree.XML @@ -868,6 +880,7 @@ _bytes('This is a test.' % (i, i)), canonicalize(data)) + required_versions_ET['test_write_method_html'] = (1,3) def test_write_method_html(self): ElementTree = self.etree.ElementTree Element = self.etree.Element @@ -887,6 +900,7 @@ self.assertEquals(_bytes('

html
test

'), data) + required_versions_ET['test_write_method_text'] = (1,3) def test_write_method_text(self): ElementTree = self.etree.ElementTree Element = self.etree.Element @@ -1062,6 +1076,7 @@ a.tail) self.assertXML(_bytes(''), a) + required_versions_ET['test_extend'] = (1,3) def test_extend(self): root = self.etree.Element('foo') for i in range(3): @@ -1099,6 +1114,7 @@ self.assertEquals(a[0].text, 'foo') # ElementTree < 1.3 adds whitespace around comments + required_versions_ET['test_comment_text'] = (1,3) def test_comment_text(self): Element = self.etree.Element SubElement = self.etree.SubElement @@ -1119,8 +1135,9 @@ self.assertEquals( _bytes(''), tostring(a)) - + # ElementTree < 1.3 adds whitespace around comments + required_versions_ET['test_comment_whitespace'] = (1,3) def test_comment_whitespace(self): Element = self.etree.Element SubElement = self.etree.SubElement @@ -1654,6 +1671,7 @@ _bytes(''), b) + required_versions_ET['test_iter'] = (1,3) def test_iter(self): Element = self.etree.Element SubElement = self.etree.SubElement @@ -2607,6 +2625,7 @@ self.assert_(tostring(b) == _bytes('Foo') or tostring(b) == _bytes('Foo')) + required_versions_ET['test_tostring_method_html'] = (1,3) def test_tostring_method_html(self): tostring = self.etree.tostring Element = self.etree.Element @@ -2621,6 +2640,7 @@ self.assertEquals(_bytes('

html
test

'), tostring(html, method="html")) + required_versions_ET['test_tostring_method_text'] = (1,3) def test_tostring_method_text(self): tostring = self.etree.tostring Element = self.etree.Element @@ -2784,6 +2804,7 @@ parse = self.etree.parse self.assertRaises(TypeError, parse, None) + required_versions_ET['test_parse_error'] = (1,3) def test_parse_error(self): # ET < 1.3 raises ExpatError parse = self.etree.parse @@ -2791,6 +2812,7 @@ self.assertRaises(SyntaxError, parse, f) f.close() + required_versions_ET['test_parse_error_from_file'] = (1,3) def test_parse_error_from_file(self): parse = self.etree.parse # from file @@ -2890,6 +2912,7 @@ self.assertEquals(_str('S?k p? nettet').encode('iso-8859-1'), result) + required_versions_ET['test_parse_encoding_8bit_explicit'] = (1,3) def test_parse_encoding_8bit_explicit(self): XMLParser = self.etree.XMLParser @@ -2905,6 +2928,7 @@ a = tree.getroot() self.assertEquals(a.text, text) + required_versions_ET['test_parse_encoding_8bit_override'] = (1,3) def test_parse_encoding_8bit_override(self): XMLParser = self.etree.XMLParser @@ -3302,11 +3326,28 @@ self.assertEquals(root[0].tag, "a") self.assertEquals(root[0].get("test"), "works") + def test_feed_parser_unicode(self): + parser = self.etree.XMLParser() + + parser.feed(_str('<')) + parser.feed(_str('a test="works"/')) + parser.feed(_str('>')) + + root = parser.close() + + self.assertEquals(root.tag, "root") + self.assertEquals(root[0].tag, "a") + self.assertEquals(root[0].get("test"), "works") + + required_versions_ET['test_feed_parser_error_close_empty'] = (1,3) def test_feed_parser_error_close_empty(self): ParseError = self.etree.ParseError parser = self.etree.XMLParser() self.assertRaises(ParseError, parser.close) + required_versions_ET['test_feed_parser_error_close_incomplete'] = (1,3) def test_feed_parser_error_close_incomplete(self): ParseError = self.etree.ParseError parser = self.etree.XMLParser() @@ -3316,6 +3357,7 @@ self.assertRaises(ParseError, parser.close) + required_versions_ET['test_feed_parser_error_broken'] = (1,3) def test_feed_parser_error_broken(self): ParseError = self.etree.ParseError parser = self.etree.XMLParser() @@ -3330,6 +3372,7 @@ self.assertRaises(ParseError, parser.close) + required_versions_ET['test_feed_parser_error_position'] = (1,3) def test_feed_parser_error_position(self): ParseError = self.etree.ParseError parser = self.etree.XMLParser() @@ -3344,6 +3387,7 @@ # parser target interface + required_versions_ET['test_parser_target_property'] = (1,3) def test_parser_target_property(self): class Target(object): pass @@ -3488,6 +3532,7 @@ "end-sub", "start-sub", "end-sub", "end-root"], events) + required_versions_ET['test_parser_target_entity_unknown'] = (1,3) def test_parser_target_entity_unknown(self): events = [] class Target(object): @@ -3645,10 +3690,18 @@ class ElementTreeTestCase(ETreeTestCaseBase): etree = ElementTree + filter_by_version( + ElementTreeTestCase, + ElementTreeTestCase.required_versions_ET, ET_VERSION) + if cElementTree: class CElementTreeTestCase(ETreeTestCaseBase): etree = cElementTree + filter_by_version( + CElementTreeTestCase, + CElementTreeTestCase.required_versions_cET, CET_VERSION) + def test_suite(): suite = unittest.TestSuite() if etree: From scoder at codespeak.net Tue Oct 13 09:15:10 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 13 Oct 2009 09:15:10 +0200 (CEST) Subject: [Lxml-checkins] r68361 - in lxml/trunk: . src/lxml Message-ID: <20091013071510.30A5B168058@codespeak.net> Author: scoder Date: Tue Oct 13 09:15:07 2009 New Revision: 68361 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/extensions.pxi Log: r5273 at delle: sbehnel | 2009-10-12 21:46:24 +0200 code simplification Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Tue Oct 13 09:15:07 2009 @@ -677,12 +677,12 @@ xpath.xmlXPathParserContext* ctxt, int nargs): cdef _Document doc cdef xpath.xmlXPathObject* obj - cdef int i cdef list args + cdef int i doc = context._doc try: args = [] - for i from 0 <= i < nargs: + for i in xrange(nargs): obj = xpath.valuePop(ctxt) o = _unwrapXPathObject(obj, doc, context._build_smart_strings) _freeXPathObject(obj) From scoder at codespeak.net Tue Oct 13 09:15:13 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 13 Oct 2009 09:15:13 +0200 (CEST) Subject: [Lxml-checkins] r68362 - in lxml/trunk: . src/lxml/html src/lxml/html/tests Message-ID: <20091013071513.E2976168058@codespeak.net> Author: scoder Date: Tue Oct 13 09:15:13 2009 New Revision: 68362 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/html/__init__.py lxml/trunk/src/lxml/html/tests/test_forms.txt Log: r5274 at delle: sbehnel | 2009-10-13 08:48:17 +0200 let textarea.value represent the complete tag content Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Oct 13 09:15:13 2009 @@ -22,6 +22,11 @@ Bugs fixed ---------- +* The ``.value`` attribute of ``textarea`` elements in lxml.html did + not represent the complete raw value (including child tags etc.). It + now serialises the complete content on read and replaces the + complete content by a string on write. + * Looking up and deleting attributes without a namespace could hit a namespaced attribute of the same name instead. Modified: lxml/trunk/src/lxml/html/__init__.py ============================================================================== --- lxml/trunk/src/lxml/html/__init__.py (original) +++ lxml/trunk/src/lxml/html/__init__.py Tue Oct 13 09:15:13 2009 @@ -965,11 +965,21 @@ """ Get/set the value (which is the contents of this element) """ - return self.text or '' + content = self.text or '' + if self.tag.startswith("{%s}" % XHTML_NAMESPACE): + serialisation_method = 'xml' + else: + serialisation_method = 'html' + for el in self: + # it's rare that we actually get here, so let's not use ''.join() + content += etree.tostring(el, method=serialisation_method) + return content def _value__set(self, value): + del self[:] self.text = value def _value__del(self): self.text = '' + del self[:] value = property(_value__get, _value__set, _value__del, doc=_value__get.__doc__) HtmlElementClassLookup._default_element_classes['textarea'] = TextareaElement Modified: lxml/trunk/src/lxml/html/tests/test_forms.txt ============================================================================== --- lxml/trunk/src/lxml/html/tests/test_forms.txt (original) +++ lxml/trunk/src/lxml/html/tests/test_forms.txt Tue Oct 13 09:15:13 2009 @@ -166,3 +166,21 @@ [('foo', 'bar')] >>> tree.forms[0].fields.values() ['bar'] + +>>> tree = lxml.html.fromstring(''' +... +...
+... +...
+... +... ''') +>>> tree.forms[0].fields.keys() +['foo'] +>>> ta = tree.forms[0].inputs['foo'] +>>> print(ta.value) +some text
content
with tags +>>> ta.value = 'abc
def' +>>> print(ta.value) +abc
def +>>> len(ta) +0 From scoder at codespeak.net Fri Oct 16 23:38:48 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 16 Oct 2009 23:38:48 +0200 (CEST) Subject: [Lxml-checkins] r68550 - in lxml/trunk: . src/lxml Message-ID: <20091016213848.B3629168067@codespeak.net> Author: scoder Date: Fri Oct 16 23:38:48 2009 New Revision: 68550 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r5279 at delle: sbehnel | 2009-10-16 23:30:29 +0200 tiny code fix Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Oct 16 23:38:48 2009 @@ -1606,11 +1606,11 @@ _copyNonElementSiblings(self._context_node._c_node, root._c_node) doc = root._doc c_doc = self._context_node._doc._c_doc - if c_doc.intSubset and not doc._c_doc.intSubset: + if c_doc.intSubset is not NULL and doc._c_doc.intSubset is NULL: doc._c_doc.intSubset = tree.xmlCopyDtd(c_doc.intSubset) if doc._c_doc.intSubset is NULL: python.PyErr_NoMemory() - if c_doc.extSubset and not doc._c_doc.extSubset: + if c_doc.extSubset is not NULL and not doc._c_doc.extSubset is NULL: doc._c_doc.extSubset = tree.xmlCopyDtd(c_doc.extSubset) if doc._c_doc.extSubset is NULL: python.PyErr_NoMemory() From scoder at codespeak.net Fri Oct 16 23:38:54 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 16 Oct 2009 23:38:54 +0200 (CEST) Subject: [Lxml-checkins] r68551 - in lxml/trunk: . src/lxml Message-ID: <20091016213854.58F88168076@codespeak.net> Author: scoder Date: Fri Oct 16 23:38:52 2009 New Revision: 68551 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/parser.pxi Log: r5280 at delle: sbehnel | 2009-10-16 23:38:38 +0200 fix bug 410916: entity non-resolution in feed parser Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Oct 16 23:38:52 2009 @@ -22,6 +22,9 @@ Bugs fixed ---------- +* The ``resolve_entities`` option did not work in the incremental feed + parser. + * The ``.value`` attribute of ``textarea`` elements in lxml.html did not represent the complete raw value (including child tags etc.). It now serialises the complete content on read and replaces the Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Fri Oct 16 23:38:52 2009 @@ -1094,6 +1094,15 @@ py_buffer_len -= buffer_len c_data += buffer_len + if error and not pctxt.replaceEntities and not pctxt.validate: + # in this mode, we ignore errors about undefined entities + for entry in context._error_log.filter_from_errors(): + if entry.type != ErrorTypes.WAR_UNDECLARED_ENTITY and \ + entry.type != ErrorTypes.ERR_UNDECLARED_ENTITY: + break + else: + error = 0 + if not recover and (error or not pctxt.wellFormed): self._feed_parser_running = 0 try: From scoder at codespeak.net Fri Oct 16 23:43:18 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 16 Oct 2009 23:43:18 +0200 (CEST) Subject: [Lxml-checkins] r68552 - in lxml/branch/lxml-2.2: . src/lxml Message-ID: <20091016214318.DBAA4168067@codespeak.net> Author: scoder Date: Fri Oct 16 23:43:17 2009 New Revision: 68552 Modified: lxml/branch/lxml-2.2/ (props changed) lxml/branch/lxml-2.2/CHANGES.txt lxml/branch/lxml-2.2/src/lxml/parser.pxi Log: trunk merge: fix for bug 410916 - unresolved entities in feed parser Modified: lxml/branch/lxml-2.2/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.2/CHANGES.txt (original) +++ lxml/branch/lxml-2.2/CHANGES.txt Fri Oct 16 23:43:17 2009 @@ -2,7 +2,7 @@ lxml changelog ============== -2.2.2 (2009-10-??) +2.2.3 (2009-10-??) ================== Features added @@ -11,6 +11,9 @@ Bugs fixed ---------- +* The ``resolve_entities`` option did not work in the incremental feed + parser. + * Looking up and deleting attributes without a namespace could hit a namespaced attribute of the same name instead. Modified: lxml/branch/lxml-2.2/src/lxml/parser.pxi ============================================================================== --- lxml/branch/lxml-2.2/src/lxml/parser.pxi (original) +++ lxml/branch/lxml-2.2/src/lxml/parser.pxi Fri Oct 16 23:43:17 2009 @@ -1094,6 +1094,15 @@ py_buffer_len -= buffer_len c_data += buffer_len + if error and not pctxt.replaceEntities and not pctxt.validate: + # in this mode, we ignore errors about undefined entities + for entry in context._error_log.filter_from_errors(): + if entry.type != ErrorTypes.WAR_UNDECLARED_ENTITY and \ + entry.type != ErrorTypes.ERR_UNDECLARED_ENTITY: + break + else: + error = 0 + if not recover and (error or not pctxt.wellFormed): self._feed_parser_running = 0 try: From scoder at codespeak.net Sat Oct 17 01:18:39 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 17 Oct 2009 01:18:39 +0200 (CEST) Subject: [Lxml-checkins] r68557 - in lxml/trunk: . src/lxml Message-ID: <20091016231839.9A7B0168041@codespeak.net> Author: scoder Date: Sat Oct 17 01:18:39 2009 New Revision: 68557 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/docloader.pxi lxml/trunk/src/lxml/iterparse.pxi lxml/trunk/src/lxml/lxml.objectify.pyx lxml/trunk/src/lxml/objectpath.pxi lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/python.pxd Log: r5283 at delle: sbehnel | 2009-10-17 01:16:46 +0200 fix PyBytes/PyString usage Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Sat Oct 17 01:18:39 2009 @@ -1233,12 +1233,12 @@ cdef object _utf8(object s): cdef int invalid - if python.PyString_CheckExact(s): + if python.PyBytes_CheckExact(s): invalid = check_string_utf8(s) elif python.PyUnicode_CheckExact(s) or python.PyUnicode_Check(s): s = python.PyUnicode_AsUTF8String(s) invalid = check_string_utf8(s) == -1 - elif python.PyString_Check(s): + elif python.PyBytes_Check(s): invalid = check_string_utf8(s) else: raise TypeError, u"Argument must be string or unicode." @@ -1275,7 +1275,7 @@ """ if filename is None: return None - elif python.PyString_Check(filename): + elif python.PyBytes_Check(filename): return filename elif python.PyUnicode_Check(filename): filename8 = python.PyUnicode_AsEncodedString( @@ -1313,7 +1313,7 @@ cdef char* c_filename if filename is None: return None - elif python.PyString_Check(filename): + elif python.PyBytes_Check(filename): if not check_string_utf8(filename): # plain ASCII! return filename Modified: lxml/trunk/src/lxml/docloader.pxi ============================================================================== --- lxml/trunk/src/lxml/docloader.pxi (original) +++ lxml/trunk/src/lxml/docloader.pxi Sat Oct 17 01:18:39 2009 @@ -49,7 +49,7 @@ cdef _InputDocument doc_ref if python.PyUnicode_Check(string): string = python.PyUnicode_AsUTF8String(string) - elif not python.PyString_Check(string): + elif not python.PyBytes_Check(string): raise TypeError, "argument must be a byte string or unicode string" doc_ref = _InputDocument() doc_ref._type = PARSER_DATA_STRING Modified: lxml/trunk/src/lxml/iterparse.pxi ============================================================================== --- lxml/trunk/src/lxml/iterparse.pxi (original) +++ lxml/trunk/src/lxml/iterparse.pxi Sat Oct 17 01:18:39 2009 @@ -479,7 +479,7 @@ while python.PyList_GET_SIZE(context._events) == 0: if c_stream is NULL: data = self._source.read(__ITERPARSE_CHUNK_SIZE) - if not python.PyString_Check(data): + if not python.PyBytes_Check(data): self._source = None raise TypeError, u"reading file objects must return plain strings" c_data_len = python.PyString_GET_SIZE(data) Modified: lxml/trunk/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.objectify.pyx (original) +++ lxml/trunk/src/lxml/lxml.objectify.pyx Sat Oct 17 01:18:39 2009 @@ -949,7 +949,7 @@ cdef object _type cdef list _schema_types def __init__(self, name, type_check, type_class, stringify=None): - if python.PyString_Check(name): + if python.PyBytes_Check(name): name = python.PyUnicode_FromEncodedObject(name, 'ASCII', NULL) elif not python.PyUnicode_Check(name): raise TypeError, u"Type name must be a string" @@ -1579,12 +1579,12 @@ doc = element._doc if empty_type_name is not None: - if python.PyString_Check(empty_type_name): + if python.PyBytes_Check(empty_type_name): empty_type_name = python.PyUnicode_FromEncodedObject( empty_type_name, "ASCII", NULL) dict_result = python.PyDict_GetItem(_SCHEMA_TYPE_DICT, empty_type_name) elif empty_pytype_name is not None: - if python.PyString_Check(empty_pytype_name): + if python.PyBytes_Check(empty_pytype_name): empty_pytype_name = python.PyUnicode_FromEncodedObject( empty_pytype_name, "ASCII", NULL) dict_result = python.PyDict_GetItem(_PYTYPE_DICT, empty_pytype_name) Modified: lxml/trunk/src/lxml/objectpath.pxi ============================================================================== --- lxml/trunk/src/lxml/objectpath.pxi (original) +++ lxml/trunk/src/lxml/objectpath.pxi Sat Oct 17 01:18:39 2009 @@ -93,7 +93,7 @@ """ cdef bint has_dot cdef list new_path = [] - if python.PyString_Check(path): + if python.PyBytes_Check(path): path = python.PyUnicode_FromEncodedObject(path, 'ASCII', NULL) path = path.strip() if path == u'.': Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sat Oct 17 01:18:39 2009 @@ -357,7 +357,7 @@ c_requested -= remaining self._bytes = self._filelike.read(c_requested) - if not python.PyString_Check(self._bytes): + if not python.PyBytes_Check(self._bytes): if python.PyUnicode_Check(self._bytes): if self._encoding is None: self._bytes = python.PyUnicode_AsUTF8String(self._bytes) @@ -1043,7 +1043,7 @@ cdef int buffer_len cdef int error cdef bint recover = self._parse_options & xmlparser.XML_PARSE_RECOVER - if python.PyString_Check(data): + if python.PyBytes_Check(data): if self._default_encoding is None: c_encoding = NULL else: @@ -1538,7 +1538,7 @@ # pass native unicode only if libxml2 can handle it if _UNICODE_ENCODING is NULL: text = python.PyUnicode_AsUTF8String(text) - elif not python.PyString_Check(text): + elif not python.PyBytes_Check(text): raise ValueError, u"can only parse strings" if python.PyUnicode_Check(url): url = python.PyUnicode_AsUTF8String(url) Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Sat Oct 17 01:18:39 2009 @@ -23,8 +23,8 @@ cdef bint PyUnicode_Check(object obj) cdef bint PyUnicode_CheckExact(object obj) - cdef bint PyString_Check(object obj) - cdef bint PyString_CheckExact(object obj) + cdef bint PyBytes_Check(object obj) + cdef bint PyBytes_CheckExact(object obj) cdef object PyUnicode_FromEncodedObject(object s, char* encoding, char* errors) From scoder at codespeak.net Sat Oct 17 01:18:43 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 17 Oct 2009 01:18:43 +0200 (CEST) Subject: [Lxml-checkins] r68558 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20091016231843.C7827168058@codespeak.net> Author: scoder Date: Sat Oct 17 01:18:43 2009 New Revision: 68558 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/extensions.pxi lxml/trunk/src/lxml/tests/test_xpathevaluator.py lxml/trunk/src/lxml/xpath.pxi Log: r5284 at delle: sbehnel | 2009-10-17 01:18:28 +0200 ticket 446654: support strings in node-sets returned from extension functions Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Sat Oct 17 01:18:43 2009 @@ -457,12 +457,15 @@ ################################################################################ # helper functions -cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL: +cdef xpath.xmlXPathObject* _wrapXPathObject(object obj, _Document doc, + _BaseContext context) except NULL: cdef xpath.xmlNodeSet* resultSet - cdef _Element node + cdef _Element fake_node = None + cdef xmlNode* c_node + if python.PyUnicode_Check(obj): obj = _utf8(obj) - if python.PyString_Check(obj): + if python.PyBytes_Check(obj): return xpath.xmlXPathNewCString(_cstr(obj)) if python.PyBool_Check(obj): return xpath.xmlXPathNewBoolean(obj) @@ -474,13 +477,42 @@ resultSet = xpath.xmlXPathNodeSetCreate((<_Element>obj)._c_node) elif python.PySequence_Check(obj): resultSet = xpath.xmlXPathNodeSetCreate(NULL) - for element in obj: - if isinstance(element, _Element): - node = <_Element>element - xpath.xmlXPathNodeSetAdd(resultSet, node._c_node) - else: - xpath.xmlXPathFreeNodeSet(resultSet) - raise XPathResultError, u"This is not a node: %r" % element + try: + for value in obj: + if isinstance(value, _Element): + if context is not None: + context._hold(value) + xpath.xmlXPathNodeSetAdd(resultSet, (<_Element>value)._c_node) + else: + if context is None or doc is None: + raise XPathResultError, \ + u"Non-Element values not supported at this point - got %r" % value + # support strings by appending text nodes to an Element + if python.PyUnicode_Check(value): + value = _utf8(value) + if python.PyBytes_Check(value): + if fake_node is None: + fake_node = _makeElement("text-root", NULL, doc, None, + None, None, None, None, None) + context._hold(fake_node) + else: + # append a comment node to keep the text nodes separate + c_node = tree.xmlNewDocComment(doc._c_doc, "") + if c_node is NULL: + python.PyErr_NoMemory() + tree.xmlAddChild(fake_node._c_node, c_node) + context._hold(value) + c_node = tree.xmlNewDocText(doc._c_doc, _cstr(value)) + if c_node is NULL: + python.PyErr_NoMemory() + tree.xmlAddChild(fake_node._c_node, c_node) + xpath.xmlXPathNodeSetAdd(resultSet, c_node) + else: + raise XPathResultError, \ + u"This is not a supported node-set result: %r" % value + except: + xpath.xmlXPathFreeNodeSet(resultSet) + raise else: raise XPathResultError, u"Unknown return type: %s" % \ python._fqtypename(obj) @@ -613,7 +645,7 @@ else: is_text = not (is_tail or is_attribute) - if python.PyString_CheckExact(string_value): + if python.PyBytes_CheckExact(string_value): result = _ElementStringResult(string_value) result._parent = parent result.is_attribute = is_attribute @@ -691,7 +723,7 @@ res = function(context, *args) # wrap result for XPath consumption - obj = _wrapXPathObject(res) + obj = _wrapXPathObject(res, doc, context) # prevent Python from deallocating elements handed to libxml2 context._hold(res) xpath.valuePush(ctxt, obj) Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Sat Oct 17 01:18:43 2009 @@ -593,8 +593,14 @@ def tag(elem): return elem.tag +def tag_or_value(elem): + return getattr(elem, 'tag', elem) + def stringTest(ctxt, s1): return "Hello "+s1 + +def stringListTest(ctxt, s1): + return ["Hello "] + list(s1) + ["!"] def floatTest(ctxt, f1): return f1+4 @@ -616,7 +622,7 @@ return st1 def resultTypesTest(ctxt): - return ["x","y"] + return [None,None] def resultTypesTest2(ctxt): return resultTypesTest @@ -624,6 +630,7 @@ uri = "http://www.example.com/" extension = {(None, 'stringTest'): stringTest, + (None, 'stringListTest'): stringListTest, (None, 'floatTest'): floatTest, (None, 'booleanTest'): booleanTest, (None, 'setTest'): setTest, @@ -655,6 +662,8 @@ ['tag'] >>> list(map(tag, e("setTest2(/body/*)"))) ['tag', 'section'] + >>> list(map(tag_or_value, e("stringListTest(/body/tag)"))) + ['Hello', 'tag', 'tag', 'tag', '!'] >>> e("argsTest1('a',1.5,true(),/body/tag)") "a, 1.5, True, ['tag', 'tag', 'tag']" >>> list(map(tag, e("argsTest2(/body/tag, /body/section)"))) @@ -662,7 +671,7 @@ >>> e("resultTypesTest()") Traceback (most recent call last): ... - XPathResultError: This is not a node: 'x' + XPathResultError: This is not a supported node-set result: None >>> try: ... e("resultTypesTest2()") ... except etree.XPathResultError: Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Sat Oct 17 01:18:43 2009 @@ -79,16 +79,16 @@ for name, value in variable_dict.items(): name_utf = self._to_utf(name) xpath.xmlXPathRegisterVariable( - self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value)) + self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value, None, None)) cdef registerVariable(self, name, value): name_utf = self._to_utf(name) xpath.xmlXPathRegisterVariable( - self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value)) + self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value, None, None)) cdef void _registerVariable(self, name_utf, value): xpath.xmlXPathRegisterVariable( - self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value)) + self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value, None, None)) cdef void _setupDict(self, xpath.xmlXPathContext* xpathCtxt): __GLOBAL_PARSER_CONTEXT.initXPathParserDict(xpathCtxt) From scoder at codespeak.net Sat Oct 17 01:20:52 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 17 Oct 2009 01:20:52 +0200 (CEST) Subject: [Lxml-checkins] r68559 - lxml/trunk Message-ID: <20091016232052.20FDC16805A@codespeak.net> Author: scoder Date: Sat Oct 17 01:20:51 2009 New Revision: 68559 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r5288 at delle: sbehnel | 2009-10-17 01:20:44 +0200 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Oct 17 01:20:51 2009 @@ -8,6 +8,9 @@ Features added -------------- +* Support for strings (in addition to Elements) in node-sets returned + by extension functions. + * Forms that lack an ``action`` attribute default to the base URL of the document on submit. From scoder at codespeak.net Sat Oct 17 01:22:55 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 17 Oct 2009 01:22:55 +0200 (CEST) Subject: [Lxml-checkins] r68560 - in lxml/trunk: . src/lxml/tests Message-ID: <20091016232255.AC94F16805C@codespeak.net> Author: scoder Date: Sat Oct 17 01:22:55 2009 New Revision: 68560 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/tests/test_xpathevaluator.py Log: r5290 at delle: sbehnel | 2009-10-17 01:22:51 +0200 test fix Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Sat Oct 17 01:22:55 2009 @@ -663,7 +663,7 @@ >>> list(map(tag, e("setTest2(/body/*)"))) ['tag', 'section'] >>> list(map(tag_or_value, e("stringListTest(/body/tag)"))) - ['Hello', 'tag', 'tag', 'tag', '!'] + ['Hello ', 'tag', 'tag', 'tag', '!'] >>> e("argsTest1('a',1.5,true(),/body/tag)") "a, 1.5, True, ['tag', 'tag', 'tag']" >>> list(map(tag, e("argsTest2(/body/tag, /body/section)"))) From scoder at codespeak.net Sat Oct 17 02:11:43 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 17 Oct 2009 02:11:43 +0200 (CEST) Subject: [Lxml-checkins] r68563 - in lxml/trunk: . src/lxml/html src/lxml/html/tests Message-ID: <20091017001143.24E1016803B@codespeak.net> Author: scoder Date: Sat Oct 17 02:11:42 2009 New Revision: 68563 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/html/__init__.py lxml/trunk/src/lxml/html/tests/test_rewritelinks.txt Log: r5292 at delle: sbehnel | 2009-10-17 02:11:33 +0200 fix bug 449926: reverse URL iteration inside of text content to simplify replacements Modified: lxml/trunk/src/lxml/html/__init__.py ============================================================================== --- lxml/trunk/src/lxml/html/__init__.py (original) +++ lxml/trunk/src/lxml/html/__init__.py Sat Oct 17 02:11:42 2009 @@ -308,6 +308,13 @@ Note: is *not* taken into account in any way. The link you get is exactly the link in the document. + + Note: multiple links inside of a single text string or + attribute value are returned in reversed order. This makes it + possible to replace or delete them from the text string value + based on their reported text positions. Otherwise, a + modification at one text position can change the positions of + links reported later on. """ link_attrs = defs.link_attrs for el in self.iter(): @@ -347,15 +354,29 @@ ## http://www.w3.org/TR/html401/struct/objects.html#adef-valuetype yield (el, 'value', el.get('value'), 0) if tag == 'style' and el.text: - for match in _css_url_re.finditer(el.text): - url, start = _unquote_match(match.group(1), match.start(1)) - yield (el, None, url, start) - for match in _css_import_re.finditer(el.text): - yield (el, None, match.group(1), match.start(1)) + urls = [ + _unquote_match(match.group(1), match.start(1)) + for match in _css_url_re.finditer(el.text) + ] + [ + (match.group(1), match.start(1)) + for match in _css_import_re.finditer(el.text) + ] + if urls: + # sort by start pos to bring both match sets back into order + urls = [ (start, url) for (url, start) in urls ] + urls.sort() + # reverse the list to report correct positions despite + # modifications + urls.reverse() + for start, url in urls: + yield (el, None, url, start) if 'style' in attribs: - for match in _css_url_re.finditer(attribs['style']): - url, start = _unquote_match(match.group(1), match.start(1)) - yield (el, 'style', url, start) + urls = list(_css_url_re.finditer(attribs['style'])) + if urls: + # return in reversed order to simplify in-place modifications + for match in urls[::-1]: + url, start = _unquote_match(match.group(1), match.start(1)) + yield (el, 'style', url, start) def rewrite_links(self, link_repl_func, resolve_base_href=True, base_href=None): Modified: lxml/trunk/src/lxml/html/tests/test_rewritelinks.txt ============================================================================== --- lxml/trunk/src/lxml/html/tests/test_rewritelinks.txt (original) +++ lxml/trunk/src/lxml/html/tests/test_rewritelinks.txt Sat Oct 17 02:11:42 2009 @@ -1,3 +1,8 @@ + +Setup:: + + >>> import lxml.html + We'll define a link translation function: >>> base_href = 'http://old/base/path.html' @@ -118,8 +123,8 @@ ... ... ''')) link href="style.css" - style None="/bg.gif"@40 style None="/other-styles.css"@69 + style None="/bg.gif"@40 script src="/js-funcs.js" a href="/test.html" a href="/other.html" @@ -179,3 +184,48 @@ + +Check if we can replace multiple links inside of the same text string:: + + >>> html = lxml.html.fromstring ("""\ + ... + ... + ... Test + ... + ... + ... + ...

Hi

+ ... + ... + ... """, + ... base_url = 'http://www.example.com/') + + >>> html.make_links_absolute () + + >>> try: _unicode = unicode + ... except NameError: _unicode = str + + >>> print(lxml.html.tostring (html, pretty_print = True, encoding=_unicode)) + + + Test + + + +

Hi

+ + From scoder at codespeak.net Sat Oct 17 03:34:13 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 17 Oct 2009 03:34:13 +0200 (CEST) Subject: [Lxml-checkins] r68565 - lxml/trunk Message-ID: <20091017013413.9D72816800D@codespeak.net> Author: scoder Date: Sat Oct 17 03:34:13 2009 New Revision: 68565 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r5294 at delle: sbehnel | 2009-10-17 02:19:50 +0200 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Oct 17 03:34:13 2009 @@ -25,6 +25,14 @@ Bugs fixed ---------- +* Rewriting multiple links inside of HTML text content could end up + replacing unrelated content as replacements could impact the + reported position of subsequent matches. Modifications are now + simplified by letting the ``iterlinks()`` generator in ``lxml.html`` + return links in reversed order if they appear inside the same text + node. Thus, replacements and link-internal modifications no longer + change the position of links reported afterwards. + * The ``resolve_entities`` option did not work in the incremental feed parser. From scoder at codespeak.net Sat Oct 17 03:34:16 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 17 Oct 2009 03:34:16 +0200 (CEST) Subject: [Lxml-checkins] r68566 - in lxml/trunk: . src/lxml Message-ID: <20091017013416.D54D4168015@codespeak.net> Author: scoder Date: Sat Oct 17 03:34:16 2009 New Revision: 68566 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/extensions.pxi Log: r5295 at delle: sbehnel | 2009-10-17 03:23:53 +0200 Py3 fix Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Sat Oct 17 03:34:16 2009 @@ -714,7 +714,7 @@ doc = context._doc try: args = [] - for i in xrange(nargs): + for i in range(nargs): obj = xpath.valuePop(ctxt) o = _unwrapXPathObject(obj, doc, context._build_smart_strings) _freeXPathObject(obj) From scoder at codespeak.net Sat Oct 17 03:34:20 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 17 Oct 2009 03:34:20 +0200 (CEST) Subject: [Lxml-checkins] r68567 - in lxml/trunk: . src/lxml Message-ID: <20091017013420.72504168013@codespeak.net> Author: scoder Date: Sat Oct 17 03:34:19 2009 New Revision: 68567 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/python.pxd Log: r5296 at delle: sbehnel | 2009-10-17 03:34:05 +0200 create _Attrib instance on each .attrib request instead of keeping a cyclic reference to it (alternative weak-ref implementation is actually slower) Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Oct 17 03:34:19 2009 @@ -88,6 +88,10 @@ Other changes ------------- +* ``Element.attrib`` no longer uses a cyclic reference back to its + Element object. It therefore no longer requires the garbage + collector to clean up. + * Static builds include libiconv, in addition to libxml2 and libxslt. Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Sat Oct 17 03:34:19 2009 @@ -528,7 +528,6 @@ cdef _Document _doc cdef xmlNode* _c_node cdef object _tag - cdef object _attrib def _init(self): u"""_init(self) @@ -814,9 +813,15 @@ keys(), values() and items() to access element attributes. """ def __get__(self): - if self._attrib is None: - self._attrib = _Attrib(self) - return self._attrib + return _Attrib(self) + ## cdef python.PyObject* ref + ## if self._attrib is not None: + ## ref = python.PyWeakref_GET_OBJECT(self._attrib) + ## if ref is not None: + ## return ref + ## attrib = _Attrib(self) + ## self._attrib = python.PyWeakref_NewRef(attrib, NULL) + ## return attrib property text: u"""Text before the first subelement. This is either a string or @@ -1948,7 +1953,7 @@ u"""A dict-like proxy for the ``Element.attrib`` property. """ cdef _Element _element - def __init__(self, _Element element not None): + def __cinit__(self, _Element element not None): self._element = element # MANIPULATORS @@ -2064,6 +2069,12 @@ other = dict(other) return python.PyObject_RichCompare(one, other, op) + +cdef extern from "etree_defs.h": + # macro call to 't->tp_new()' for fast instantiation + cdef _Attrib NEW_ATTRIB "PY_NEW" (object t) + + cdef class _AttribIterator: u"""Attribute iterator - for internal use only! """ Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Sat Oct 17 03:34:19 2009 @@ -90,6 +90,9 @@ cdef object PyObject_RichCompare(object o1, object o2, int op) cdef int PyObject_RichCompareBool(object o1, object o2, int op) +# object PyWeakref_NewRef(object ob, PyObject* callback) +# PyObject* PyWeakref_GET_OBJECT(object ref) + cdef void* PyMem_Malloc(size_t size) cdef void* PyMem_Realloc(void* p, size_t size) cdef void PyMem_Free(void* p) From scoder at codespeak.net Fri Oct 30 14:38:03 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 30 Oct 2009 14:38:03 +0100 (CET) Subject: [Lxml-checkins] r68859 - lxml/branch/lxml-2.2/doc Message-ID: <20091030133803.43671168495@codespeak.net> Author: scoder Date: Fri Oct 30 14:38:02 2009 New Revision: 68859 Modified: lxml/branch/lxml-2.2/doc/elementsoup.txt Log: doc typo Modified: lxml/branch/lxml-2.2/doc/elementsoup.txt ============================================================================== --- lxml/branch/lxml-2.2/doc/elementsoup.txt (original) +++ lxml/branch/lxml-2.2/doc/elementsoup.txt Fri Oct 30 14:38:02 2009 @@ -182,7 +182,7 @@ If you prefer a 'real' (and fast) HTML parser instead of the regular expression based one in BeautifulSoup, you can still benefit from -BeautifulSoup's _`support for encoding detection` in the +BeautifulSoup's `support for encoding detection`_ in the ``UnicodeDammit`` class. .. sourcecode:: pycon From scoder at codespeak.net Fri Oct 30 15:02:05 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 30 Oct 2009 15:02:05 +0100 (CET) Subject: [Lxml-checkins] r68861 - in lxml/branch/lxml-2.2: . doc Message-ID: <20091030140205.93EDA1684A9@codespeak.net> Author: scoder Date: Fri Oct 30 15:02:05 2009 New Revision: 68861 Modified: lxml/branch/lxml-2.2/CHANGES.txt lxml/branch/lxml-2.2/doc/main.txt Log: prepare release of 2.2.3 Modified: lxml/branch/lxml-2.2/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.2/CHANGES.txt (original) +++ lxml/branch/lxml-2.2/CHANGES.txt Fri Oct 30 15:02:05 2009 @@ -2,7 +2,7 @@ lxml changelog ============== -2.2.3 (2009-10-??) +2.2.3 (2009-10-30) ================== Features added Modified: lxml/branch/lxml-2.2/doc/main.txt ============================================================================== --- lxml/branch/lxml-2.2/doc/main.txt (original) +++ lxml/branch/lxml-2.2/doc/main.txt Fri Oct 30 15:02:05 2009 @@ -147,8 +147,8 @@ source release. If you can't wait, consider trying a less recent release version first. -The latest version is `lxml 2.2.2`_, released 2009-06-21 -(`changes for 2.2.2`_). `Older versions`_ are listed below. +The latest version is `lxml 2.2.3`_, released 2009-10-30 +(`changes for 2.2.3`_). `Older versions`_ are listed below. Please take a look at the `installation instructions`_! @@ -221,7 +221,9 @@ `_ and the `current in-development version `_. -.. _`PDF documentation`: lxmldoc-2.2.2.pdf +.. _`PDF documentation`: lxmldoc-2.2.3.pdf + +* `lxml 2.2.2`_, released 2009-06-21 (`changes for 2.2.2`_) * `lxml 2.2.1`_, released 2009-06-02 (`changes for 2.2.1`_) @@ -323,6 +325,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.2.3`: lxml-2.2.3.tgz .. _`lxml 2.2.2`: lxml-2.2.2.tgz .. _`lxml 2.2.1`: lxml-2.2.1.tgz .. _`lxml 2.2`: lxml-2.2.tgz @@ -374,6 +377,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.2.3`: changes-2.2.3.html .. _`changes for 2.2.2`: changes-2.2.2.html .. _`changes for 2.2.1`: changes-2.2.1.html .. _`changes for 2.2`: changes-2.2.html From scoder at codespeak.net Fri Oct 30 15:10:34 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 30 Oct 2009 15:10:34 +0100 (CET) Subject: [Lxml-checkins] r68862 - lxml/branch/lxml-2.2 Message-ID: <20091030141034.387BA1684AB@codespeak.net> Author: scoder Date: Fri Oct 30 15:10:33 2009 New Revision: 68862 Modified: lxml/branch/lxml-2.2/CHANGES.txt Log: typos Modified: lxml/branch/lxml-2.2/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.2/CHANGES.txt (original) +++ lxml/branch/lxml-2.2/CHANGES.txt Fri Oct 30 15:10:33 2009 @@ -23,8 +23,9 @@ * Modifying trees that contain parsed entity references could result in an infinite loop. -* ObjectifiedElement.__setattr__ created an empty-string child element when the - attribute value was rejected as a non-unicode/non-ascii string +* ``ObjectifiedElement.__setattr__`` created an empty-string child + element when the attribute value was rejected as a + non-unicode/non-ascii string * Syntax errors in ``lxml.cssselect`` could result in misleading error messages. @@ -50,7 +51,7 @@ * The ``ElementMaker`` in lxml.objectify no longer defines the default namespaces when annotation is disabled. -* Feed parser failed to honout the 'recover' option on parse errors. +* Feed parser failed to honour the 'recover' option on parse errors. * Diverting the error logging to Python's logging system was broken. From scoder at codespeak.net Fri Oct 30 15:11:35 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 30 Oct 2009 15:11:35 +0100 (CET) Subject: [Lxml-checkins] r68863 - lxml/tag/lxml-2.2.3 Message-ID: <20091030141135.DF0E31684A9@codespeak.net> Author: scoder Date: Fri Oct 30 15:11:35 2009 New Revision: 68863 Added: lxml/tag/lxml-2.2.3/ - copied from r68862, lxml/branch/lxml-2.2/ Log: tag for 2.2.3 From scoder at codespeak.net Fri Oct 30 15:13:09 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 30 Oct 2009 15:13:09 +0100 (CET) Subject: [Lxml-checkins] r68864 - in lxml/trunk: . src/lxml Message-ID: <20091030141309.D21B81684A9@codespeak.net> Author: scoder Date: Fri Oct 30 15:13:09 2009 New Revision: 68864 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r5300 at delle: sbehnel | 2009-10-17 09:33:41 +0200 work-around for Py3.1 Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Oct 30 15:13:09 2009 @@ -985,7 +985,7 @@ return _countElements(self._c_node.children) def __nonzero__(self): - u"__nonzero__(self)" + #u"__nonzero__(self)" # currently fails in Py3.1 import warnings warnings.warn( u"The behavior of this method will change in future versions. " From scoder at codespeak.net Fri Oct 30 15:13:14 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 30 Oct 2009 15:13:14 +0100 (CET) Subject: [Lxml-checkins] r68865 - in lxml/trunk: . doc Message-ID: <20091030141314.C27111684A9@codespeak.net> Author: scoder Date: Fri Oct 30 15:13:13 2009 New Revision: 68865 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/doc/elementsoup.txt lxml/trunk/doc/main.txt Log: r5301 at delle: sbehnel | 2009-10-30 15:12:46 +0100 integrated 2.2 branch release changes Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Oct 30 15:13:13 2009 @@ -33,14 +33,37 @@ node. Thus, replacements and link-internal modifications no longer change the position of links reported afterwards. -* The ``resolve_entities`` option did not work in the incremental feed - parser. - * The ``.value`` attribute of ``textarea`` elements in lxml.html did not represent the complete raw value (including child tags etc.). It now serialises the complete content on read and replaces the complete content by a string on write. +* Target parser didn't call ``.close()`` on the target object if + parsing failed. Now it is guaranteed that ``.close()`` will be + called after parsing, regardless of the outcome. + +Other changes +------------- + +* ``Element.attrib`` no longer uses a cyclic reference back to its + Element object. It therefore no longer requires the garbage + collector to clean up. + +* Static builds include libiconv, in addition to libxml2 and libxslt. + + +2.2.3 (2009-10-30) +================== + +Features added +-------------- + +Bugs fixed +---------- + +* The ``resolve_entities`` option did not work in the incremental feed + parser. + * Looking up and deleting attributes without a namespace could hit a namespaced attribute of the same name instead. @@ -79,21 +102,11 @@ * Feed parser failed to honout the 'recover' option on parse errors. -* Target parser didn't call ``.close()`` on the target object if - parsing failed. Now it is guaranteed that ``.close()`` will be - called after parsing, regardless of the outcome. - * Diverting the error logging to Python's logging system was broken. Other changes ------------- -* ``Element.attrib`` no longer uses a cyclic reference back to its - Element object. It therefore no longer requires the garbage - collector to clean up. - -* Static builds include libiconv, in addition to libxml2 and libxslt. - 2.2.2 (2009-06-21) ================== Modified: lxml/trunk/doc/elementsoup.txt ============================================================================== --- lxml/trunk/doc/elementsoup.txt (original) +++ lxml/trunk/doc/elementsoup.txt Fri Oct 30 15:13:13 2009 @@ -182,7 +182,7 @@ If you prefer a 'real' (and fast) HTML parser instead of the regular expression based one in BeautifulSoup, you can still benefit from -BeautifulSoup's _`support for encoding detection` in the +BeautifulSoup's `support for encoding detection`_ in the ``UnicodeDammit`` class. .. sourcecode:: pycon Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Fri Oct 30 15:13:13 2009 @@ -221,7 +221,11 @@ `_ and the `current in-development version `_. -.. _`PDF documentation`: lxmldoc-2.2.2.pdf +.. _`PDF documentation`: lxmldoc-2.2.3.pdf + +* `lxml 2.2.3`_, released 2009-10-30 (`changes for 2.2.3`_) + +* `lxml 2.2.2`_, released 2009-06-21 (`changes for 2.2.2`_) * `lxml 2.2.1`_, released 2009-06-02 (`changes for 2.2.1`_) @@ -323,6 +327,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.2.3`: lxml-2.2.3.tgz .. _`lxml 2.2.2`: lxml-2.2.2.tgz .. _`lxml 2.2.1`: lxml-2.2.1.tgz .. _`lxml 2.2`: lxml-2.2.tgz @@ -374,6 +379,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.2.3`: changes-2.2.2.html .. _`changes for 2.2.2`: changes-2.2.2.html .. _`changes for 2.2.1`: changes-2.2.1.html .. _`changes for 2.2`: changes-2.2.html