From scoder at codespeak.net Thu Feb 3 13:08:10 2011 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 3 Feb 2011 13:08:10 +0100 (CET) Subject: [Lxml-checkins] r80266 - in lxml/trunk: . src/lxml Message-ID: <20110203120810.4D30B2A201A@codespeak.net> Author: scoder Date: Thu Feb 3 13:08:07 2011 New Revision: 80266 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/iterparse.pxi Log: always close input file from iterparse(), not only when we opened it Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Feb 3 13:08:07 2011 @@ -18,7 +18,7 @@ immediately calls its ``.close()`` method. * When finished parsing, ``iterparse()`` immediately closes the input - file if it has opened it itself. + file. * Work-around for libxml2 bug that can leave the HTML parser in a non-functional state after parsing a severly broken document (fixed Modified: lxml/trunk/src/lxml/iterparse.pxi ============================================================================== --- lxml/trunk/src/lxml/iterparse.pxi (original) +++ lxml/trunk/src/lxml/iterparse.pxi Thu Feb 3 13:08:07 2011 @@ -360,7 +360,6 @@ cdef object _buffer cdef int (*_parse_chunk)(xmlparser.xmlParserCtxt* ctxt, char* chunk, int size, int terminate) nogil - cdef bint _close_source_file def __init__(self, source, events=(u"end",), *, tag=None, attribute_defaults=False, dtd_validation=False, @@ -376,10 +375,8 @@ if not python.IS_PYTHON3: source = filename source = open(source, u'rb') - self._close_source_file = True else: filename = _encodeFilename(_getFilenameForFile(source)) - self._close_source_file = False self._source = source if html: @@ -456,9 +453,6 @@ cdef _close_source(self): if self._source is None: return - if not self._close_source_file: - self._source = None - return try: close = self._source.close except AttributeError: From scoder at codespeak.net Thu Feb 3 13:08:13 2011 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 3 Feb 2011 13:08:13 +0100 (CET) Subject: [Lxml-checkins] r80267 - in lxml/trunk/src/lxml: . tests Message-ID: <20110203120813.407062A201C@codespeak.net> Author: scoder Date: Thu Feb 3 13:08:11 2011 New Revision: 80267 Modified: lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/tests/test_etree.py Log: raise TypeError instead of AssertionError on el.extend([None]) Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Thu Feb 3 13:08:11 2011 @@ -750,9 +750,11 @@ Extends the current children by the elements in the iterable. """ + cdef _Element element _assertValidNode(self) for element in elements: - assert element is not None, u"Node must not be None" + if element is None: + raise TypeError, u"Node must not be None" _assertValidNode(element) _appendChild(self, element) Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Thu Feb 3 13:08:11 2011 @@ -1310,10 +1310,14 @@ self.assertRaises(TypeError, a.__setitem__, 0, 'foo') - def test_append_None(self): - # raises AssertionError in ElementTree + def test_append_error(self): Element = self.etree.Element - self.assertRaises(TypeError, Element('a').append, None) + root = Element('root') + # raises AssertionError in ElementTree + self.assertRaises(TypeError, root.append, None) + self.assertRaises(TypeError, root.extend, [None]) + self.assertRaises(TypeError, root.extend, [Element('one'), None]) + self.assertEquals('one', root[0].tag) def test_addnext(self): Element = self.etree.Element From scoder at codespeak.net Fri Feb 4 09:57:02 2011 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 4 Feb 2011 09:57:02 +0100 (CET) Subject: [Lxml-checkins] r80276 - lxml/trunk/doc Message-ID: <20110204085702.426562A2002@codespeak.net> Author: scoder Date: Fri Feb 4 09:57:00 2011 New Revision: 80276 Modified: lxml/trunk/doc/xpathxslt.txt Log: doc cleanup in xpathxslt.txt, new section about the error log in XSLT Modified: lxml/trunk/doc/xpathxslt.txt ============================================================================== --- lxml/trunk/doc/xpathxslt.txt (original) +++ lxml/trunk/doc/xpathxslt.txt Fri Feb 4 09:57:00 2011 @@ -435,19 +435,19 @@ ==== lxml.etree introduces a new class, lxml.etree.XSLT. The class can be -given an ElementTree object to construct an XSLT transformer: +given an ElementTree or Element object to construct an XSLT +transformer: .. sourcecode:: pycon - >>> f = StringIO('''\ + >>> xslt_root = etree.XML('''\ ... ... ... ... ... ''') - >>> xslt_doc = etree.parse(f) - >>> transform = etree.XSLT(xslt_doc) + >>> transform = etree.XSLT(xslt_root) You can then run the transformation on an ElementTree document by simply calling it, and this results in another ElementTree object: @@ -478,9 +478,8 @@ .. sourcecode:: pycon - >>> f = StringIO('Text') - >>> doc = etree.parse(f) - >>> result = transform(doc) + >>> root = etree.XML('Text') + >>> result = transform(root) >>> result.getroot().text 'Text' @@ -543,15 +542,14 @@ ... ... ''') >>> transform = etree.XSLT(xslt_tree) - >>> f = StringIO('Text') - >>> doc = etree.parse(f) + >>> doc_root = etree.XML('Text') The parameters are passed as keyword parameters to the transform call. First, let's try passing in a simple integer expression: .. sourcecode:: pycon - >>> result = transform(doc, a="5") + >>> result = transform(doc_root, a="5") >>> str(result) '\n5\n' @@ -559,7 +557,7 @@ .. sourcecode:: pycon - >>> result = transform(doc, a="/a/b/text()") + >>> result = transform(doc_root, a="/a/b/text()") >>> str(result) '\nText\n' @@ -567,7 +565,7 @@ .. sourcecode:: pycon - >>> result = transform(doc, a=etree.XPath("/a/b/text()")) + >>> result = transform(doc_root, a=etree.XPath("/a/b/text()")) >>> str(result) '\nText\n' @@ -575,7 +573,7 @@ .. sourcecode:: pycon - >>> result = transform(doc, a="'A'") + >>> result = transform(doc_root, a="'A'") >>> str(result) '\nA\n' @@ -588,11 +586,45 @@ >>> plain_string_value = etree.XSLT.strparam( ... """ It's "Monty Python" """) - >>> result = transform(doc, a=plain_string_value) + >>> result = transform(doc_root, a=plain_string_value) >>> str(result) '\n It\'s "Monty Python" \n' +Errors and messages +------------------- + +Like most of the processing oriented objects in lxml.etree, ``XSLT`` +provides an error log that lists messages and error output from the +last run. + +.. sourcecode:: pycon + + >>> xslt_root = etree.XML('''\ + ... + ... + ... STARTING + ... + ... DONE + ... + ... ''') + >>> transform = etree.XSLT(xslt_root) + + >>> doc_root = etree.XML('Text') + >>> result = transform(doc_root) + >>> str(result) + '\nText\n' + + >>> transform.error_log + :0:0:ERROR:XSLT:ERR_OK: STARTING + :0:0:ERROR:XSLT:ERR_OK: DONE + +Note that there is no way to distinguish between user messages, +warnings and error messages that occurred during the run. ``libxslt`` +simply does not provide this information. + + The ``xslt()`` tree method -------------------------- From scoder at codespeak.net Fri Feb 4 09:57:07 2011 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 4 Feb 2011 09:57:07 +0100 (CET) Subject: [Lxml-checkins] r80277 - lxml/trunk/src/lxml Message-ID: <20110204085707.6657C2A2005@codespeak.net> Author: scoder Date: Fri Feb 4 09:57:04 2011 New Revision: 80277 Modified: lxml/trunk/src/lxml/xmlerror.pxi Log: docstrings for error log entry Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Fri Feb 4 09:57:04 2011 @@ -39,6 +39,18 @@ # Logging classes cdef class _LogEntry: + """A log message entry from an error log. + + Attributes: + + - message: the message text + - domain: the domain ID (see lxml.etree.ErrorDomains) + - type: the message type ID (see lxml.etree.ErrorTypes) + - level: the log level ID (see lxml.etree.ErrorLevels) + - line: the line at which the message originated (if applicable) + - column: the character column at which the message originated (if applicable) + - filename: the name of the file in which the message originated (if applicable) + """ cdef readonly object domain cdef readonly object type cdef readonly object level @@ -87,10 +99,14 @@ self.domain_name, self.type_name, self.message) property domain_name: + """The name of the error domain. See lxml.etree.ErrorDomains + """ def __get__(self): return ErrorDomains._getName(self.domain, u"unknown") property type_name: + """The name of the error type. See lxml.etree.ErrorTypes + """ def __get__(self): if self.domain == ErrorDomains.RELAXNGV: getName = RelaxNGErrorTypes._getName @@ -99,6 +115,8 @@ return getName(self.type, u"unknown") property level_name: + """The name of the error level. See lxml.etree.ErrorLevels + """ def __get__(self): return ErrorLevels._getName(self.level, u"unknown") From scoder at codespeak.net Fri Feb 4 09:57:09 2011 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 4 Feb 2011 09:57:09 +0100 (CET) Subject: [Lxml-checkins] r80278 - lxml/trunk/src/lxml Message-ID: <20110204085709.829DA2A2006@codespeak.net> Author: scoder Date: Fri Feb 4 09:57:07 2011 New Revision: 80278 Modified: lxml/trunk/src/lxml/xmlerror.pxi Log: code simplification for error log entry type Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Fri Feb 4 09:57:07 2011 @@ -51,11 +51,11 @@ - column: the character column at which the message originated (if applicable) - filename: the name of the file in which the message originated (if applicable) """ - cdef readonly object domain - cdef readonly object type - cdef readonly object level - cdef readonly object line - cdef readonly object column + cdef readonly int domain + cdef readonly int type + cdef readonly int level + cdef readonly int line + cdef readonly int column cdef readonly object message cdef readonly object filename From scoder at codespeak.net Sat Feb 5 12:07:45 2011 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 5 Feb 2011 12:07:45 +0100 (CET) Subject: [Lxml-checkins] r80288 - lxml/trunk/doc Message-ID: <20110205110745.E36C62A2002@codespeak.net> Author: scoder Date: Sat Feb 5 12:07:43 2011 New Revision: 80288 Modified: lxml/trunk/doc/parsing.txt lxml/trunk/doc/xpathxslt.txt Log: extended documentation about the error log Modified: lxml/trunk/doc/parsing.txt ============================================================================== --- lxml/trunk/doc/parsing.txt (original) +++ lxml/trunk/doc/parsing.txt Sat Feb 5 12:07:43 2011 @@ -172,8 +172,8 @@ Error log --------- -Parsers have an ``error_log`` property that lists the errors of the -last parser run: +Parsers have an ``error_log`` property that lists the errors and +warnings of the last parser run: .. sourcecode:: pycon @@ -197,6 +197,27 @@ >>> print(error.column) 11 +Each entry in the log has the following properties: + +* ``message``: the message text +* ``domain``: the domain ID (see the lxml.etree.ErrorDomains class) +* ``type``: the message type ID (see the lxml.etree.ErrorTypes class) +* ``level``: the log level ID (see the lxml.etree.ErrorLevels class) +* ``line``: the line at which the message originated (if applicable) +* ``column``: the character column at which the message originated (if applicable) +* ``filename``: the name of the file in which the message originated (if applicable) + +For convenience, there are also three properties that provide readable +names for the ID values: + +* ``domain_name`` +* ``type_name`` +* ``level_name`` + +To filter for a specific kind of message, use the different +``filter_*()`` methods on the error log (see the +lxml.etree._ListErrorLog class). + Parsing HTML ------------ Modified: lxml/trunk/doc/xpathxslt.txt ============================================================================== --- lxml/trunk/doc/xpathxslt.txt (original) +++ lxml/trunk/doc/xpathxslt.txt Sat Feb 5 12:07:43 2011 @@ -596,7 +596,10 @@ Like most of the processing oriented objects in lxml.etree, ``XSLT`` provides an error log that lists messages and error output from the -last run. +last run. See the `parser documentation`_ for a description of the +error log. + +.. _`parser documentation`: parsing.html#error-log .. sourcecode:: pycon @@ -620,9 +623,29 @@ :0:0:ERROR:XSLT:ERR_OK: STARTING :0:0:ERROR:XSLT:ERR_OK: DONE -Note that there is no way to distinguish between user messages, -warnings and error messages that occurred during the run. ``libxslt`` -simply does not provide this information. + >>> for entry in transform.error_log: + ... print('message from line %s, col %s: %s' % ( + ... entry.line, entry.column, entry.message)) + ... print('domain: %s (%d)' % (entry.domain_name, entry.domain)) + ... print('type: %s (%d)' % (entry.type_name, entry.type)) + ... print('level: %s (%d)' % (entry.level_name, entry.level)) + ... print('filename: %s' % entry.filename) + message from line 0, col 0: STARTING + domain: XSLT (22) + type: ERR_OK (0) + level: ERROR (2) + filename: + message from line 0, col 0: DONE + domain: XSLT (22) + type: ERR_OK (0) + level: ERROR (2) + filename: + +Note that there is no way in XSLT to distinguish between user +messages, warnings and error messages that occurred during the +run. ``libxslt`` simply does not provide this information. You can +partly work around this limitation by making your own messages +uniquely identifiable, e.g. with a common text prefix. The ``xslt()`` tree method From scoder at codespeak.net Sat Feb 5 18:41:22 2011 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 5 Feb 2011 18:41:22 +0100 (CET) Subject: [Lxml-checkins] r80300 - lxml/trunk/doc Message-ID: <20110205174122.5BB8A282B9C@codespeak.net> Author: scoder Date: Sat Feb 5 18:41:20 2011 New Revision: 80300 Modified: lxml/trunk/doc/xpathxslt.txt Log: doc comment on passing non-Python names as XSLT parameters Modified: lxml/trunk/doc/xpathxslt.txt ============================================================================== --- lxml/trunk/doc/xpathxslt.txt (original) +++ lxml/trunk/doc/xpathxslt.txt Sat Feb 5 18:41:20 2011 @@ -590,6 +590,25 @@ >>> str(result) '\n It\'s "Monty Python" \n' +If you need to pass parameters that are not legal Python identifiers, +pass them inside of a dictionary: + +.. sourcecode:: pycon + + >>> transform = etree.XSLT(etree.XML('''\ + ... + ... + ... + ... + ... + ... ''')) + + >>> result = transform(doc_root, **{'non-python-identifier': '5'}) + >>> str(result) + '\n5\n' + + Errors and messages ------------------- From scoder at codespeak.net Sun Feb 6 20:09:06 2011 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Feb 2011 20:09:06 +0100 (CET) Subject: [Lxml-checkins] r80302 - in lxml/trunk: . doc Message-ID: <20110206190906.EAA8F36C374@codespeak.net> Author: scoder Date: Sun Feb 6 20:09:04 2011 New Revision: 80302 Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/main.txt lxml/trunk/version.txt Log: prepare release of 2.3 final Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Feb 6 20:09:04 2011 @@ -2,8 +2,8 @@ lxml changelog ============== -Under development -================== +2.3 (2011-02-06) +================ Features added -------------- Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Sun Feb 6 20:09:04 2011 @@ -150,8 +150,8 @@ source release. If you can't wait, consider trying a less recent release version first. -The latest version is `lxml 2.3beta1`_, released 2010-09-06 -(`changes for 2.3beta1`_). `Older versions`_ are listed below. +The latest version is `lxml 2.3`_, released 2011-02-06 +(`changes for 2.3`_). `Older versions`_ are listed below. Please take a look at the `installation instructions`_! @@ -222,13 +222,16 @@ See the web sites of lxml `1.3 `_, `2.0 `_, `2.1 -`_ and the `current stable version +`_, `2.2 +`_ and the `current stable version `_ .. `current in-development version `_. .. _`PDF documentation`: lxmldoc-2.3beta1.pdf +* `lxml 2.3beta1`_, released 2010-09-06 (`changes for 2.3beta1`_) + * `lxml 2.3alpha2`_, released 2010-07-24 (`changes for 2.3alpha2`_) * `lxml 2.3alpha1`_, released 2010-06-19 (`changes for 2.3alpha1`_) @@ -337,6 +340,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.3`: lxml-2.3.tgz .. _`lxml 2.3beta1`: lxml-2.3beta1.tgz .. _`lxml 2.3alpha2`: lxml-2.3alpha2.tgz .. _`lxml 2.3alpha1`: lxml-2.3alpha1.tgz @@ -392,6 +396,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.3`: changes-2.3.html .. _`changes for 2.3beta1`: changes-2.3beta1.html .. _`changes for 2.3alpha2`: changes-2.3alpha2.html .. _`changes for 2.3alpha1`: changes-2.3alpha1.html Modified: lxml/trunk/version.txt ============================================================================== --- lxml/trunk/version.txt (original) +++ lxml/trunk/version.txt Sun Feb 6 20:09:04 2011 @@ -1 +1 @@ -2.3beta1 +2.3 From scoder at codespeak.net Sun Feb 6 20:24:05 2011 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Feb 2011 20:24:05 +0100 (CET) Subject: [Lxml-checkins] r80303 - lxml/trunk/doc Message-ID: <20110206192405.D491236C374@codespeak.net> Author: scoder Date: Sun Feb 6 20:24:04 2011 New Revision: 80303 Modified: lxml/trunk/doc/main.txt Log: fix PDF doc link Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Sun Feb 6 20:24:04 2011 @@ -228,7 +228,7 @@ .. `current in-development version `_. -.. _`PDF documentation`: lxmldoc-2.3beta1.pdf +.. _`PDF documentation`: lxmldoc-2.3.pdf * `lxml 2.3beta1`_, released 2010-09-06 (`changes for 2.3beta1`_) From scoder at codespeak.net Mon Feb 7 08:12:03 2011 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Feb 2011 08:12:03 +0100 (CET) Subject: [Lxml-checkins] r80304 - lxml/tag/lxml-2.3 Message-ID: <20110207071203.71301282B8B@codespeak.net> Author: scoder Date: Mon Feb 7 08:12:00 2011 New Revision: 80304 Added: lxml/tag/lxml-2.3/ - copied from r80303, lxml/trunk/ Log: tag for lxml 2.3 From scoder at codespeak.net Mon Feb 7 10:08:52 2011 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Feb 2011 10:08:52 +0100 (CET) Subject: [Lxml-checkins] r80305 - lxml/trunk Message-ID: <20110207090852.53D97282B9C@codespeak.net> Author: scoder Date: Mon Feb 7 10:08:50 2011 New Revision: 80305 Modified: lxml/trunk/setup.py Log: doc fix Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Mon Feb 7 10:08:50 2011 @@ -52,7 +52,8 @@ Running ``easy_install lxml==%(branch_version)sbugfix`` will install the unreleased branch state from http://codespeak.net/svn/lxml/branch/lxml-%(branch_version)s#egg=lxml-%(branch_version)sbugfix -as soon as a maintenance branch has been established. +as soon as a maintenance branch has been established. Note that this +requires Cython to be installed for the build. """ if versioninfo.is_pre_release(): From scoder at codespeak.net Thu Feb 10 13:04:25 2011 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 10 Feb 2011 13:04:25 +0100 (CET) Subject: [Lxml-checkins] r80325 - lxml/trunk Message-ID: <20110210120425.14B932A202D@codespeak.net> Author: scoder Date: Thu Feb 10 13:04:23 2011 New Revision: 80325 Modified: lxml/trunk/CREDITS.txt Log: credits update: pascal Modified: lxml/trunk/CREDITS.txt ============================================================================== --- lxml/trunk/CREDITS.txt (original) +++ lxml/trunk/CREDITS.txt Thu Feb 10 13:04:23 2011 @@ -20,6 +20,9 @@ Sidnei da Silva official MS Windows builds +Pascal Obernd?rfer + official Mac-OS builds + Marc-Antoine Parent XPath extension function help and patches