From scoder at codespeak.net Fri Dec 10 02:40:57 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 10 Dec 2010 02:40:57 +0100 (CET) Subject: [Lxml-checkins] r79946 - lxml/trunk/src/lxml Message-ID: <20101210014057.9F3DF282B9D@codespeak.net> Author: scoder Date: Fri Dec 10 02:40:53 2010 New Revision: 79946 Modified: lxml/trunk/src/lxml/lxml.objectify.pyx Log: removed unused import Modified: lxml/trunk/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.objectify.pyx (original) +++ lxml/trunk/src/lxml/lxml.objectify.pyx Fri Dec 10 02:40:53 2010 @@ -34,9 +34,6 @@ cdef tuple IGNORABLE_ERRORS = (ValueError, TypeError) cdef object is_special_method = re.compile(u'__.*__$').match -cdef object islice -from itertools import islice - cdef object _typename(object t): cdef char* c_name cdef char* s From scoder at codespeak.net Fri Dec 10 02:40:59 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 10 Dec 2010 02:40:59 +0100 (CET) Subject: [Lxml-checkins] r79947 - lxml/trunk/src/lxml Message-ID: <20101210014059.52F04282B9D@codespeak.net> Author: scoder Date: Fri Dec 10 02:40:57 2010 New Revision: 79947 Modified: lxml/trunk/src/lxml/apihelpers.pxi Log: fix ticket 683069: detect encoding declaration in unicode parser input Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Fri Dec 10 02:40:57 2010 @@ -624,7 +624,7 @@ cdef object __RE_XML_ENCODING __RE_XML_ENCODING = re.compile( - ur'^(\s*<\?\s*xml[^>]+)\s+encoding\s*=\s*"[^"]*"\s*', re.U) + ur'^(\s*<\?\s*xml[^>]+)\s+encoding\s*=\s*["\'][^"\']*["\']\s*', re.U) cdef object __REPLACE_XML_ENCODING __REPLACE_XML_ENCODING = __RE_XML_ENCODING.sub From scoder at codespeak.net Sun Dec 26 21:27:30 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 26 Dec 2010 21:27:30 +0100 (CET) Subject: [Lxml-checkins] r80099 - lxml/trunk/src/lxml Message-ID: <20101226202730.E98B0282BA1@codespeak.net> Author: scoder Date: Sun Dec 26 21:27:26 2010 New Revision: 80099 Modified: lxml/trunk/src/lxml/lxml.etree.pyx Log: predefine 'xml' prefix for XML namespace Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Sun Dec 26 21:27:26 2010 @@ -127,6 +127,7 @@ # set up some default namespace prefixes cdef object _DEFAULT_NAMESPACE_PREFIXES _DEFAULT_NAMESPACE_PREFIXES = { + b"http://www.w3.org/XML/1998/namespace": b'xml', b"http://www.w3.org/1999/xhtml": b"html", b"http://www.w3.org/1999/XSL/Transform": b"xsl", b"http://www.w3.org/1999/02/22-rdf-syntax-ns#": b"rdf", From scoder at codespeak.net Sun Dec 26 21:27:33 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 26 Dec 2010 21:27:33 +0100 (CET) Subject: [Lxml-checkins] r80100 - lxml/trunk/src/lxml Message-ID: <20101226202733.EC563282BA1@codespeak.net> Author: scoder Date: Sun Dec 26 21:27:31 2010 New Revision: 80100 Modified: lxml/trunk/src/lxml/apihelpers.pxi Log: typing fixes Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Sun Dec 26 21:27:31 2010 @@ -636,7 +636,7 @@ # this is a hack to remove the XML encoding declaration from unicode return __REPLACE_XML_ENCODING(ur'\g<1>', xml_string) -cdef int _hasEncodingDeclaration(object xml_string): +cdef bint _hasEncodingDeclaration(object xml_string): # check if a (unicode) string has an XML encoding declaration return __HAS_XML_ENCODING(xml_string) is not None @@ -652,10 +652,10 @@ xml_string = xml_string[i:] return xml_string -cdef inline int _hasText(xmlNode* c_node): +cdef inline bint _hasText(xmlNode* c_node): return c_node is not NULL and _textNodeOrSkip(c_node.children) is not NULL -cdef inline int _hasTail(xmlNode* c_node): +cdef inline bint _hasTail(xmlNode* c_node): return c_node is not NULL and _textNodeOrSkip(c_node.next) is not NULL cdef _collectText(xmlNode* c_node): From scoder at codespeak.net Sun Dec 26 21:27:41 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 26 Dec 2010 21:27:41 +0100 (CET) Subject: [Lxml-checkins] r80101 - lxml/trunk/src/lxml/tests Message-ID: <20101226202741.139CF282BA1@codespeak.net> Author: scoder Date: Sun Dec 26 21:27:39 2010 New Revision: 80101 Modified: lxml/trunk/src/lxml/tests/common_imports.py Log: minor test code cleanup Modified: lxml/trunk/src/lxml/tests/common_imports.py ============================================================================== --- lxml/trunk/src/lxml/tests/common_imports.py (original) +++ lxml/trunk/src/lxml/tests/common_imports.py Sun Dec 26 21:27:39 2010 @@ -218,8 +218,7 @@ return os.path.join(_testdir, name) def canonicalize(xml): - f = BytesIO(xml) - tree = etree.parse(f) + tree = etree.parse(BytesIO(xml)) f = BytesIO() tree.write_c14n(f) return f.getvalue() From scoder at codespeak.net Wed Dec 29 19:59:13 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 29 Dec 2010 19:59:13 +0100 (CET) Subject: [Lxml-checkins] r80134 - lxml/trunk/src/lxml/tests Message-ID: <20101229185913.B92EC282BEB@codespeak.net> Author: scoder Date: Wed Dec 29 19:59:10 2010 New Revision: 80134 Modified: lxml/trunk/src/lxml/tests/common_imports.py Log: fix test resource leak Modified: lxml/trunk/src/lxml/tests/common_imports.py ============================================================================== --- lxml/trunk/src/lxml/tests/common_imports.py (original) +++ lxml/trunk/src/lxml/tests/common_imports.py Wed Dec 29 19:59:10 2010 @@ -94,7 +94,11 @@ _fix_exceptions = re.compile(r'(.*except [^(]*),\s*(.*:)').sub def make_doctest(filename): filename = _get_caller_relative_path(filename) - doctests = open(filename).read() + f = open(filename) + try: + doctests = f.read() + finally: + f.close() doctests = _fix_unicode(r'\1\2', doctests) doctests = _fix_exceptions(r'\1 as \2', doctests) return doctest.DocTestCase( @@ -115,7 +119,11 @@ _fix_bytes = re.compile(r'(\s+)b(["\'])').sub def make_doctest(filename): filename = _get_caller_relative_path(filename) - doctests = open(filename).read() + f = open(filename) + try: + doctests = f.read() + finally: + f.close() doctests = _fix_traceback(r'\1\2', doctests) doctests = _fix_exceptions(r'\1, \2', doctests) doctests = _fix_bytes(r'\1\2', doctests) From scoder at codespeak.net Wed Dec 29 19:59:15 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 29 Dec 2010 19:59:15 +0100 (CET) Subject: [Lxml-checkins] r80135 - lxml/trunk/src/lxml/tests Message-ID: <20101229185915.76D45282BEB@codespeak.net> Author: scoder Date: Wed Dec 29 19:59:13 2010 New Revision: 80135 Modified: lxml/trunk/src/lxml/tests/test_etree.py Log: test fixes Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Wed Dec 29 19:59:13 2010 @@ -3025,8 +3025,10 @@ try: tree.write_c14n(filename) f = open(filename, 'rb') - data = f.read() - f.close() + try: + data = f.read() + finally: + f.close() finally: os.close(handle) os.remove(filename) @@ -3039,8 +3041,10 @@ try: tree.write_c14n(filename, compression=9) f = gzip.open(filename, 'rb') - data = f.read() - f.close() + try: + data = f.read() + finally: + f.close() finally: os.close(handle) os.remove(filename) From scoder at codespeak.net Wed Dec 29 19:59:18 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 29 Dec 2010 19:59:18 +0100 (CET) Subject: [Lxml-checkins] r80136 - lxml/trunk/src/lxml/tests Message-ID: <20101229185918.CF782282BEB@codespeak.net> Author: scoder Date: Wed Dec 29 19:59:17 2010 New Revision: 80136 Modified: lxml/trunk/src/lxml/tests/common_imports.py lxml/trunk/src/lxml/tests/test_elementtree.py Log: test fix Modified: lxml/trunk/src/lxml/tests/common_imports.py ============================================================================== --- lxml/trunk/src/lxml/tests/common_imports.py (original) +++ lxml/trunk/src/lxml/tests/common_imports.py Wed Dec 29 19:59:17 2010 @@ -78,7 +78,7 @@ if sys.version_info[0] >= 3: # Python 3 - unicode = str + from builtins import str as unicode def _str(s, encoding="UTF-8"): return s def _bytes(s, encoding="UTF-8"): @@ -106,6 +106,7 @@ doctests, {}, os.path.basename(filename), filename, 0)) else: # Python 2 + from __builtin__ import unicode def _str(s, encoding="UTF-8"): return unicode(s, encoding=encoding) def _bytes(s, encoding="UTF-8"): Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Wed Dec 29 19:59:17 2010 @@ -18,7 +18,7 @@ from common_imports import StringIO, BytesIO, etree from common_imports import ElementTree, cElementTree, ET_VERSION, CET_VERSION from common_imports import filter_by_version, fileInTestDir, canonicalize, HelperTestCase -from common_imports import _str, _bytes +from common_imports import _str, _bytes, unicode if cElementTree is not None and CET_VERSION <= (1,0,7): cElementTree = None @@ -3638,8 +3638,6 @@ """Write out element for comparison. """ data = self.etree.tostring(element, encoding=encoding) - if encoding != 'us-ascii': - data = data.decode(encoding) return canonicalize(data) def _writeElementFile(self, element, encoding='us-ascii'): @@ -3658,8 +3656,6 @@ finally: os.close(handle) os.remove(filename) - if encoding != 'us-ascii': - data = data.decode(encoding) return canonicalize(data) def assertXML(self, expected, element, encoding='us-ascii'): @@ -3667,6 +3663,8 @@ Does this two ways; once using BytesIO, once using a real file. """ + if isinstance(expected, unicode): + expected = expected.encode(encoding) self.assertEquals(expected, self._writeElement(element, encoding)) self.assertEquals(expected, self._writeElementFile(element, encoding)) From scoder at codespeak.net Wed Dec 29 19:59:21 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 29 Dec 2010 19:59:21 +0100 (CET) Subject: [Lxml-checkins] r80137 - lxml/trunk Message-ID: <20101229185921.CF989282BF4@codespeak.net> Author: scoder Date: Wed Dec 29 19:59:20 2010 New Revision: 80137 Modified: lxml/trunk/setupinfo.py Log: fix resource leak during build Modified: lxml/trunk/setupinfo.py ============================================================================== --- lxml/trunk/setupinfo.py (original) +++ lxml/trunk/setupinfo.py Wed Dec 29 19:59:20 2010 @@ -238,19 +238,21 @@ import subprocess except ImportError: # Python 2.3 - _, rf, ef = os.popen3(cmd) + sf, rf, ef = os.popen3(cmd) + sf.close() + errors = ef.read() + stdout_data = rf.read() else: # Python 2.4+ p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - rf, ef = p.stdout, p.stderr - errors = ef.read() + stdout_data, errors = p.communicate() global _ERROR_PRINTED if errors and not _ERROR_PRINTED: _ERROR_PRINTED = True print("ERROR: %s" % errors) print("** make sure the development packages of libxml2 and libxslt are installed **\n") - return decode_input(rf.read()).strip() + return decode_input(stdout_data).strip() def get_library_versions(): xml2_version = run_command(find_xml2_config(), "--version") From scoder at codespeak.net Wed Dec 29 19:59:24 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 29 Dec 2010 19:59:24 +0100 (CET) Subject: [Lxml-checkins] r80138 - lxml/trunk/doc Message-ID: <20101229185924.EBC3B282BF4@codespeak.net> Author: scoder Date: Wed Dec 29 19:59:23 2010 New Revision: 80138 Modified: lxml/trunk/doc/build.txt Log: require Cython 0.14.1 Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Wed Dec 29 19:59:23 2010 @@ -46,9 +46,9 @@ you want to be an lxml developer, then you do need a working Cython installation. You can use EasyInstall_ to install it:: - easy_install "Cython>=0.13" + easy_install "Cython>=0.14.1" -lxml currently requires Cython 0.13, later release versions should +lxml currently requires Cython 0.14.1, later release versions should work as well. From scoder at codespeak.net Wed Dec 29 19:59:29 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 29 Dec 2010 19:59:29 +0100 (CET) Subject: [Lxml-checkins] r80139 - lxml/trunk/src/lxml Message-ID: <20101229185929.6D3AB282BEB@codespeak.net> Author: scoder Date: Wed Dec 29 19:59:27 2010 New Revision: 80139 Modified: lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/extensions.pxi lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/lxml.objectify.pyx lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/readonlytree.pxi Log: code cleanup: drop clumsy redeclaration of builtin types known in Cython 0.14 Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Wed Dec 29 19:59:27 2010 @@ -770,7 +770,7 @@ return count cdef int _findChildSlice( - python.slice sliceobject, xmlNode* c_parent, + slice sliceobject, xmlNode* c_parent, xmlNode** c_start_node, Py_ssize_t* c_step, Py_ssize_t* c_length) except -1: u"""Resolve a children slice. @@ -795,7 +795,7 @@ c_start_node[0] = _findChild(c_parent, start) return 0 -cdef bint _isFullSlice(python.slice sliceobject): +cdef bint _isFullSlice(slice sliceobject): u"""Conservative guess if this slice is a full slice as in ``s[:]``. """ cdef Py_ssize_t step Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Wed Dec 29 19:59:27 2010 @@ -651,7 +651,7 @@ ################################################################################ # special str/unicode subclasses -cdef class _ElementUnicodeResult(python.unicode): +cdef class _ElementUnicodeResult(unicode): cdef _Element _parent cdef readonly object is_tail cdef readonly object is_text @@ -661,8 +661,8 @@ def getparent(self): return self._parent -class _ElementStringResult(str): - # we need to use a Python class here, str cannot be C-subclassed +class _ElementStringResult(bytes): + # we need to use a Python class here, bytes cannot be C-subclassed # in Pyrex/Cython def getparent(self): return self._parent Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Wed Dec 29 19:59:27 2010 @@ -612,7 +612,7 @@ raise ValueError, u"cannot assign None" if python.PySlice_Check(x): # slice assignment - _findChildSlice(x, self._c_node, &c_node, &step, &slicelength) + _findChildSlice(x, self._c_node, &c_node, &step, &slicelength) if step > 0: left_to_right = 1 else: @@ -647,7 +647,7 @@ _assertValidNode(self) if python.PySlice_Check(x): # slice deletion - if _isFullSlice(x): + if _isFullSlice(x): c_node = self._c_node.children if c_node is not NULL: if not _isElement(c_node): @@ -657,7 +657,7 @@ _removeNode(self._doc, c_node) c_node = c_next else: - _findChildSlice(x, self._c_node, &c_node, &step, &slicelength) + _findChildSlice(x, self._c_node, &c_node, &step, &slicelength) _deleteSlice(self._doc, c_node, slicelength, step) else: # item deletion @@ -1042,9 +1042,9 @@ _assertValidNode(self) if python.PySlice_Check(x): # slicing - if _isFullSlice(x): + if _isFullSlice(x): return _collectChildren(self) - _findChildSlice(x, self._c_node, &c_node, &step, &slicelength) + _findChildSlice(x, self._c_node, &c_node, &step, &slicelength) if c_node is NULL: return [] if step > 0: Modified: lxml/trunk/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.objectify.pyx (original) +++ lxml/trunk/src/lxml/lxml.objectify.pyx Wed Dec 29 19:59:27 2010 @@ -557,19 +557,19 @@ element._c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) cetree.setNodeText(element._c_node, value) -cdef _setSlice(slice, _Element target, items): +cdef _setSlice(sliceobject, _Element target, items): cdef _Element parent cdef tree.xmlNode* c_node cdef Py_ssize_t c_step, c_start, pos cdef list new_items # collect existing slice - if (slice).step is None: + if (sliceobject).step is None: c_step = 1 else: - c_step = (slice).step + c_step = (sliceobject).step if c_step == 0: raise ValueError, u"Invalid slice" - del_items = target[slice] + del_items = target[sliceobject] # collect new values new_items = [] @@ -614,13 +614,13 @@ if pos > 0: item = new_items[pos-1] else: - if (slice).start > 0: + if (sliceobject).start > 0: c_node = parent._c_node.children else: c_node = parent._c_node.last c_node = _findFollowingSibling( c_node, tree._getNs(target._c_node), target._c_node.name, - (slice).start - 1) + (sliceobject).start - 1) if c_node is NULL: while pos < python.PyList_GET_SIZE(new_items): cetree.appendChild(parent, new_items[pos]) Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Wed Dec 29 19:59:27 2010 @@ -12,14 +12,6 @@ cdef void Py_DECREF(object o) cdef void Py_XDECREF(PyObject* o) - ctypedef class __builtin__.slice [object PySliceObject]: - cdef object start - cdef object stop - cdef object step - - ctypedef class __builtin__.unicode [object PyUnicodeObject]: - pass - cdef FILE* PyFile_AsFile(object p) cdef bint PyUnicode_Check(object obj) Modified: lxml/trunk/src/lxml/readonlytree.pxi ============================================================================== --- lxml/trunk/src/lxml/readonlytree.pxi (original) +++ lxml/trunk/src/lxml/readonlytree.pxi Wed Dec 29 19:59:27 2010 @@ -109,9 +109,9 @@ self._assertNode() if python.PySlice_Check(x): # slicing - if _isFullSlice(x): + if _isFullSlice(x): return _collectChildren(self) - _findChildSlice(x, self._c_node, &c_node, &step, &slicelength) + _findChildSlice(x, self._c_node, &c_node, &step, &slicelength) if c_node is NULL: return [] if step > 0: From scoder at codespeak.net Wed Dec 29 19:59:32 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 29 Dec 2010 19:59:32 +0100 (CET) Subject: [Lxml-checkins] r80140 - lxml/trunk/src/lxml/tests Message-ID: <20101229185932.659DB282BEB@codespeak.net> Author: scoder Date: Wed Dec 29 19:59:30 2010 New Revision: 80140 Modified: lxml/trunk/src/lxml/tests/common_imports.py Log: minor test code cleanup Modified: lxml/trunk/src/lxml/tests/common_imports.py ============================================================================== --- lxml/trunk/src/lxml/tests/common_imports.py (original) +++ lxml/trunk/src/lxml/tests/common_imports.py Wed Dec 29 19:59:30 2010 @@ -94,11 +94,7 @@ _fix_exceptions = re.compile(r'(.*except [^(]*),\s*(.*:)').sub def make_doctest(filename): filename = _get_caller_relative_path(filename) - f = open(filename) - try: - doctests = f.read() - finally: - f.close() + doctests = read_file(filename) doctests = _fix_unicode(r'\1\2', doctests) doctests = _fix_exceptions(r'\1 as \2', doctests) return doctest.DocTestCase( @@ -120,11 +116,7 @@ _fix_bytes = re.compile(r'(\s+)b(["\'])').sub def make_doctest(filename): filename = _get_caller_relative_path(filename) - f = open(filename) - try: - doctests = f.read() - finally: - f.close() + doctests = read_file(filename) doctests = _fix_traceback(r'\1\2', doctests) doctests = _fix_exceptions(r'\1, \2', doctests) doctests = _fix_bytes(r'\1\2', doctests) @@ -226,6 +218,17 @@ _testdir = os.path.dirname(__file__) return os.path.join(_testdir, name) +def read_file(name, mode='r'): + f = open(name, mode) + try: + data = f.read() + finally: + f.close() + return data + +def readFileInTestDir(name, mode='r'): + return read_file(fileInTestDir(name), mode) + def canonicalize(xml): tree = etree.parse(BytesIO(xml)) f = BytesIO() From scoder at codespeak.net Wed Dec 29 19:59:35 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 29 Dec 2010 19:59:35 +0100 (CET) Subject: [Lxml-checkins] r80141 - lxml/trunk Message-ID: <20101229185935.4D675282BEB@codespeak.net> Author: scoder Date: Wed Dec 29 19:59:33 2010 New Revision: 80141 Modified: lxml/trunk/versioninfo.py Log: support newer SVN version in build Modified: lxml/trunk/versioninfo.py ============================================================================== --- lxml/trunk/versioninfo.py (original) +++ lxml/trunk/versioninfo.py Wed Dec 29 19:59:33 2010 @@ -33,7 +33,7 @@ data = f.read() f.close() - if data[:1] in ('8', '9'): + if data[:1] in ('8', '9') or data[:2] == '10': # SVN >= 1.4 data = [ d.splitlines() for d in data.split('\n\x0c\n') ] del data[0][0] # get rid of the '8' From scoder at codespeak.net Wed Dec 29 19:59:37 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 29 Dec 2010 19:59:37 +0100 (CET) Subject: [Lxml-checkins] r80142 - lxml/trunk Message-ID: <20101229185937.05145282BF0@codespeak.net> Author: scoder Date: Wed Dec 29 19:59:36 2010 New Revision: 80142 Modified: lxml/trunk/versioninfo.py Log: fix resource leak during build Modified: lxml/trunk/versioninfo.py ============================================================================== --- lxml/trunk/versioninfo.py (original) +++ lxml/trunk/versioninfo.py Wed Dec 29 19:59:36 2010 @@ -5,7 +5,11 @@ def version(): global __LXML_VERSION if __LXML_VERSION is None: - __LXML_VERSION = open(os.path.join(get_base_dir(), 'version.txt')).read().strip() + f = open(os.path.join(get_base_dir(), 'version.txt')) + try: + __LXML_VERSION = f.read().strip() + finally: + f.close() return __LXML_VERSION def branch_version(): From scoder at codespeak.net Wed Dec 29 19:59:41 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 29 Dec 2010 19:59:41 +0100 (CET) Subject: [Lxml-checkins] r80143 - lxml/trunk/src/lxml/tests Message-ID: <20101229185941.4E64F282BEB@codespeak.net> Author: scoder Date: Wed Dec 29 19:59:39 2010 New Revision: 80143 Modified: lxml/trunk/src/lxml/tests/test_etree.py Log: fix test resource leaks Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Wed Dec 29 19:59:39 2010 @@ -20,7 +20,7 @@ if this_dir not in sys.path: sys.path.insert(0, this_dir) # needed for Py3 -from common_imports import etree, StringIO, BytesIO, HelperTestCase, fileInTestDir +from common_imports import etree, StringIO, BytesIO, HelperTestCase, fileInTestDir, read_file from common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest from common_imports import canonicalize, sorted, _str, _bytes @@ -2941,7 +2941,7 @@ ''' % filename)) old_text = root.text - content = open(filename).read() + content = read_file(filename) old_tail = root[0].tail self.include( etree.ElementTree(root) ) @@ -2962,7 +2962,7 @@ def test_xinclude_resolver(self): class res(etree.Resolver): - include_text = open(fileInTestDir('test.xml')).read() + include_text = read_file(fileInTestDir('test.xml')) called = {} def resolve(self, url, id, context): if url.endswith(".dtd"): @@ -3015,7 +3015,11 @@ tree = self.parse(_bytes(''+''*200+'')) f = BytesIO() tree.write_c14n(f, compression=9) - s = gzip.GzipFile(fileobj=BytesIO(f.getvalue())).read() + gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue())) + try: + s = gzfile.read() + finally: + gzfile.close() self.assertEquals(_bytes(''+''*200+''), s) @@ -3024,11 +3028,7 @@ handle, filename = tempfile.mkstemp() try: tree.write_c14n(filename) - f = open(filename, 'rb') - try: - data = f.read() - finally: - f.close() + data = read_file(filename, 'rb') finally: os.close(handle) os.remove(filename) @@ -3159,7 +3159,11 @@ tree = self.parse(_bytes(''+''*200+'')) f = BytesIO() tree.write(f, compression=9) - s = gzip.GzipFile(fileobj=BytesIO(f.getvalue())).read() + gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue())) + try: + s = gzfile.read() + finally: + gzfile.close() self.assertEquals(_bytes(''+''*200+''), s) @@ -3177,13 +3181,21 @@ tree.write(f, compression=1) s = f.getvalue() self.assert_(len(s) <= len(s0)) - s1 = gzip.GzipFile(fileobj=BytesIO(s)).read() + gzfile = gzip.GzipFile(fileobj=BytesIO(s)) + try: + s1 = gzfile.read() + finally: + gzfile.close() f = BytesIO() tree.write(f, compression=9) s = f.getvalue() self.assert_(len(s) <= len(s0)) - s9 = gzip.GzipFile(fileobj=BytesIO(s)).read() + gzfile = gzip.GzipFile(fileobj=BytesIO(s)) + try: + s9 = gzfile.read() + finally: + gzfile.close() self.assertEquals(_bytes(''+''*200+''), s0) @@ -3197,9 +3209,7 @@ handle, filename = tempfile.mkstemp() try: tree.write(filename) - f = open(filename, 'rb') - data = f.read() - f.close() + data = read_file(filename, 'rb') finally: os.close(handle) os.remove(filename) @@ -3212,8 +3222,10 @@ try: tree.write(filename, compression=9) f = gzip.open(filename, 'rb') - data = f.read() - f.close() + try: + data = f.read() + finally: + f.close() finally: os.close(handle) os.remove(filename) From scoder at codespeak.net Wed Dec 29 19:59:44 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 29 Dec 2010 19:59:44 +0100 (CET) Subject: [Lxml-checkins] r80144 - in lxml/trunk: . src/lxml Message-ID: <20101229185944.68C5E282BEB@codespeak.net> Author: scoder Date: Wed Dec 29 19:59:43 2010 New Revision: 80144 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/iterparse.pxi Log: make iterparse() close files immediately after parsing if it opened them itself Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Dec 29 19:59:43 2010 @@ -14,6 +14,9 @@ Bugs fixed ---------- +* When finished parsing, ``iterparse()`` immediately closes files that + it has opened itself. + * Work-around for libxml2 bug that can leave the HTML parser in a non-functional state after parsing a severly broken document. Modified: lxml/trunk/src/lxml/iterparse.pxi ============================================================================== --- lxml/trunk/src/lxml/iterparse.pxi (original) +++ lxml/trunk/src/lxml/iterparse.pxi Wed Dec 29 19:59:43 2010 @@ -360,6 +360,8 @@ cdef object _buffer cdef int (*_parse_chunk)(xmlparser.xmlParserCtxt* ctxt, char* chunk, int size, int terminate) nogil + cdef bint _close_source_file + def __init__(self, source, events=(u"end",), *, tag=None, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, remove_blank_text=False, @@ -374,8 +376,10 @@ if not python.IS_PYTHON3: source = filename source = open(source, u'rb') + self._close_source_file = True else: filename = _encodeFilename(_getFilenameForFile(source)) + self._close_source_file = False self._source = source if html: @@ -449,6 +453,12 @@ context._setEventFilter(self._events, self._tag) return context + cdef _close_source(self): + source = self._source + self._source = None + if self._close_source_file and source is not None: + source.close() + def copy(self): raise TypeError, u"iterparse parsers cannot be copied" @@ -482,7 +492,7 @@ if c_stream is NULL: data = self._source.read(__ITERPARSE_CHUNK_SIZE) if not python.PyBytes_Check(data): - self._source = None + self._close_source() raise TypeError, u"reading file objects must return plain strings" c_data_len = python.PyBytes_GET_SIZE(data) c_data = _cstr(data) @@ -511,13 +521,13 @@ if not error and context._validator is not None: error = not context._validator.isvalid() if error: - self._source = None + self._close_source() del context._events[:] context._assureDocGetsFreed() _raiseParseError(pctxt, self._filename, context._error_log) if python.PyList_GET_SIZE(context._events) == 0: self.root = context._root - self._source = None + self._close_source() raise StopIteration From scoder at codespeak.net Wed Dec 29 19:59:47 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 29 Dec 2010 19:59:47 +0100 (CET) Subject: [Lxml-checkins] r80145 - lxml/trunk Message-ID: <20101229185947.9997A282BF0@codespeak.net> Author: scoder Date: Wed Dec 29 19:59:46 2010 New Revision: 80145 Modified: lxml/trunk/INSTALL.txt Log: docs Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Wed Dec 29 19:59:46 2010 @@ -26,7 +26,7 @@ * libxml 2.6.21 or later. It can be found here: http://xmlsoft.org/downloads.html - * We recommend libxml2 2.7.{2,3,7} or a later version. + * We recommend libxml2 2.7.8 or a later version. * If you want to use XPath, do not use libxml2 2.6.27. From scoder at codespeak.net Wed Dec 29 19:59:51 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 29 Dec 2010 19:59:51 +0100 (CET) Subject: [Lxml-checkins] r80146 - in lxml/trunk: . src/lxml Message-ID: <20101229185951.3F841282BF0@codespeak.net> Author: scoder Date: Wed Dec 29 19:59:49 2010 New Revision: 80146 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/iterparse.pxi lxml/trunk/src/lxml/parser.pxi Log: immediately close files after parsing from them Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Dec 29 19:59:49 2010 @@ -14,8 +14,11 @@ Bugs fixed ---------- -* When finished parsing, ``iterparse()`` immediately closes files that - it has opened itself. +* When finished reading from a file-like object, the parser + immediately calls its ``.close()`` method. + +* When finished parsing, ``iterparse()`` immediately closes the input + file if it has opened it itself. * Work-around for libxml2 bug that can leave the HTML parser in a non-functional state after parsing a severly broken document. Modified: lxml/trunk/src/lxml/iterparse.pxi ============================================================================== --- lxml/trunk/src/lxml/iterparse.pxi (original) +++ lxml/trunk/src/lxml/iterparse.pxi Wed Dec 29 19:59:49 2010 @@ -454,10 +454,19 @@ return context cdef _close_source(self): - source = self._source - self._source = None - if self._close_source_file and source is not None: - source.close() + if self._source is None: + return + if not self._close_source_file: + self._source = None + return + try: + close = self._source.close + except AttributeError: + close = None + finally: + self._source = None + if close is not None: + close() def copy(self): raise TypeError, u"iterparse parsers cannot be copied" Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Wed Dec 29 19:59:49 2010 @@ -265,6 +265,7 @@ cdef _ExceptionContext _exc_context cdef Py_ssize_t _bytes_read cdef char* _c_url + def __cinit__(self, filelike, exc_context, url, encoding): self._exc_context = exc_context self._filelike = filelike @@ -278,6 +279,18 @@ self._bytes = b'' self._bytes_read = 0 + cdef _close_file(self): + if self._filelike is None: + return + try: + close = self._filelike.close + except AttributeError: + close = None + finally: + self._filelike = None + if close is not None: + close() + cdef xmlparser.xmlParserInputBuffer* _createParserInputBuffer(self): cdef cstd.FILE* c_stream cdef xmlparser.xmlParserInputBuffer* c_buffer @@ -337,7 +350,7 @@ result = xmlparser.xmlCtxtReadIO( ctxt, c_read_callback, NULL, c_callback_context, self._c_url, c_encoding, options) - + self._close_file() return result cdef int copyToBuffer(self, char* c_buffer, int c_requested): @@ -366,12 +379,14 @@ self._bytes = python.PyUnicode_AsEncodedString( self._bytes, _cstr(self._encoding), NULL) else: + self._close_file() raise TypeError, \ u"reading from file-like objects must return byte strings or unicode strings" remaining = python.PyBytes_GET_SIZE(self._bytes) if remaining == 0: self._bytes_read = -1 + self._close_file() return c_byte_count self._bytes_read = 0 @@ -383,6 +398,7 @@ return c_byte_count except: self._exc_context._store_raised() + self._close_file() return -1 cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size) with gil: From scoder at codespeak.net Wed Dec 29 19:59:54 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 29 Dec 2010 19:59:54 +0100 (CET) Subject: [Lxml-checkins] r80147 - lxml/trunk Message-ID: <20101229185954.2A413282BEB@codespeak.net> Author: scoder Date: Wed Dec 29 19:59:52 2010 New Revision: 80147 Modified: lxml/trunk/CHANGES.txt Log: changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Dec 29 19:59:52 2010 @@ -21,7 +21,8 @@ file if it has opened it itself. * Work-around for libxml2 bug that can leave the HTML parser in a - non-functional state after parsing a severly broken document. + non-functional state after parsing a severly broken document (fixed + in libxml2 2.7.8). * ``marque`` tag in HTML cleanup code is correctly named ``marquee``. From scoder at codespeak.net Wed Dec 29 19:59:57 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 29 Dec 2010 19:59:57 +0100 (CET) Subject: [Lxml-checkins] r80148 - lxml/trunk/src/lxml/tests Message-ID: <20101229185957.62056282BF0@codespeak.net> Author: scoder Date: Wed Dec 29 19:59:55 2010 New Revision: 80148 Modified: lxml/trunk/src/lxml/tests/common_imports.py lxml/trunk/src/lxml/tests/test_htmlparser.py lxml/trunk/src/lxml/tests/test_io.py Log: fix test resource leaks Modified: lxml/trunk/src/lxml/tests/common_imports.py ============================================================================== --- lxml/trunk/src/lxml/tests/common_imports.py (original) +++ lxml/trunk/src/lxml/tests/common_imports.py Wed Dec 29 19:59:55 2010 @@ -226,6 +226,13 @@ f.close() return data +def write_to_file(name, data, mode='w'): + f = open(name, mode) + try: + data = f.write(data) + finally: + f.close() + def readFileInTestDir(name, mode='r'): return read_file(fileInTestDir(name), mode) Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_htmlparser.py (original) +++ lxml/trunk/src/lxml/tests/test_htmlparser.py Wed Dec 29 19:59:55 2010 @@ -12,7 +12,7 @@ sys.path.insert(0, this_dir) # needed for Py3 from common_imports import etree, StringIO, BytesIO, fileInTestDir, _bytes, _str -from common_imports import SillyFileLike, HelperTestCase +from common_imports import SillyFileLike, HelperTestCase, write_to_file try: unicode = __builtins__["unicode"] @@ -211,7 +211,7 @@ def test_module_parse_html(self): parser = self.etree.HTMLParser() filename = tempfile.mktemp(suffix=".html") - open(filename, 'wb').write(self.html_str) + write_to_file(filename, self.html_str, 'wb') try: f = open(filename, 'rb') tree = self.etree.parse(f, parser) Modified: lxml/trunk/src/lxml/tests/test_io.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_io.py (original) +++ lxml/trunk/src/lxml/tests/test_io.py Wed Dec 29 19:59:55 2010 @@ -13,6 +13,7 @@ from common_imports import etree, ElementTree, fileInTestDir, _str, _bytes from common_imports import SillyFileLike, LargeFileLike, HelperTestCase +from common_imports import read_file, write_to_file class IOTestCaseBase(HelperTestCase): """(c)ElementTree compatibility for IO functions/methods @@ -100,7 +101,7 @@ handle, filename = tempfile.mkstemp(suffix=".xml") self.tree.write(filename) try: - self.assertEqual(open(filename, 'rb').read().replace(_bytes('\n'), _bytes('')), + self.assertEqual(read_file(filename, 'rb').replace(_bytes('\n'), _bytes('')), self.root_str) finally: os.close(handle) @@ -140,7 +141,7 @@ # parse from filename handle, filename = tempfile.mkstemp(suffix=".xml") - open(filename, 'wb').write(self.root_str) + write_to_file(filename, self.root_str, 'wb') try: tree = self.etree.ElementTree() root = tree.parse(filename) @@ -151,7 +152,7 @@ def test_class_parse_filename_remove_previous(self): handle, filename = tempfile.mkstemp(suffix=".xml") - open(filename, 'wb').write(self.root_str) + write_to_file(filename, self.root_str, 'wb') try: tree = self.etree.ElementTree() root = tree.parse(filename) @@ -177,8 +178,8 @@ # parse from file object handle, filename = tempfile.mkstemp(suffix=".xml") - os.write(handle, self.root_str) try: + os.write(handle, self.root_str) f = open(filename, 'rb') tree = self.etree.ElementTree() root = tree.parse(f)