From scoder at codespeak.net Fri Aug 1 17:01:20 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Aug 2008 17:01:20 +0200 (CEST) Subject: [Lxml-checkins] r56895 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20080801150120.48915169EE5@codespeak.net> Author: scoder Date: Fri Aug 1 17:01:18 2008 New Revision: 56895 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/serializer.pxi lxml/trunk/src/lxml/tests/test_etree.py Log: r4713 at delle: sbehnel | 2008-08-01 08:00:08 +0200 new C14N options: exclusive, with_comments Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Aug 1 17:01:18 2008 @@ -8,6 +8,8 @@ Features added -------------- +* New options for exclusive C14N and C14N without comments. + * Instantiating a custom Element classes creates a new Element. Bugs fixed Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Aug 1 17:01:18 2008 @@ -1806,13 +1806,13 @@ self._assertHasRoot() XInclude()(self._context_node) - def write_c14n(self, file): - u"""write_c14n(self, file) + def write_c14n(self, file, *, exclusive=False, with_comments=True): + u"""write_c14n(self, file, exclusive=False, with_comments=True) C14N write of document. Always writes UTF-8. """ self._assertHasRoot() - _tofilelikeC14N(file, self._context_node) + _tofilelikeC14N(file, self._context_node, exclusive, with_comments) cdef _ElementTree _elementTreeFactory(_Document doc, _Element context_node): return _newElementTree(doc, context_node, _ElementTree) Modified: lxml/trunk/src/lxml/serializer.pxi ============================================================================== --- lxml/trunk/src/lxml/serializer.pxi (original) +++ lxml/trunk/src/lxml/serializer.pxi Fri Aug 1 17:01:18 2008 @@ -366,7 +366,7 @@ else: writer._exc_context._raise_if_stored() -cdef _tofilelikeC14N(f, _Element element): +cdef _tofilelikeC14N(f, _Element element, bint exclusive, bint with_comments): cdef _FilelikeWriter writer cdef tree.xmlOutputBuffer* c_buffer cdef char* c_filename @@ -381,13 +381,14 @@ filename8 = _encodeFilename(f) c_filename = _cstr(filename8) with nogil: - bytes = c14n.xmlC14NDocSave(c_doc, NULL, 0, NULL, 1, - c_filename, 0) + bytes = c14n.xmlC14NDocSave(c_doc, NULL, exclusive, NULL, + with_comments, c_filename, 0) elif hasattr(f, u'write'): writer = _FilelikeWriter(f) c_buffer = writer._createOutputBuffer(NULL) writer.error_log.connect() - bytes = c14n.xmlC14NDocSaveTo(c_doc, NULL, 0, NULL, 1, c_buffer) + bytes = c14n.xmlC14NDocSaveTo(c_doc, NULL, exclusive, NULL, + with_comments, c_buffer) writer.error_log.disconnect() tree.xmlOutputBufferClose(c_buffer) else: Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Fri Aug 1 17:01:18 2008 @@ -7,7 +7,7 @@ test_elementtree """ -import os.path, unittest, copy, sys, operator +import os.path, unittest, copy, sys, operator, tempfile this_dir = os.path.dirname(__file__) if this_dir not in sys.path: @@ -2393,6 +2393,57 @@ self.assertEquals(_bytes(''), s) + def test_c14n_file(self): + tree = self.parse(_bytes('')) + handle, filename = tempfile.mkstemp() + try: + tree.write_c14n(filename) + f = open(filename, 'rb') + data = f.read() + f.close() + finally: + os.close(handle) + os.remove(filename) + self.assertEquals(_bytes(''), + data) + + def test_c14n_with_comments(self): + tree = self.parse(_bytes('')) + f = BytesIO() + tree.write_c14n(f) + s = f.getvalue() + self.assertEquals(_bytes('\n\n'), + s) + f = BytesIO() + tree.write_c14n(f, with_comments=True) + s = f.getvalue() + self.assertEquals(_bytes('\n\n'), + s) + f = BytesIO() + tree.write_c14n(f, with_comments=False) + s = f.getvalue() + self.assertEquals(_bytes(''), + s) + + def test_c14n_exclusive(self): + tree = self.parse(_bytes( + '')) + f = BytesIO() + tree.write_c14n(f) + s = f.getvalue() + self.assertEquals(_bytes(''), + s) + f = BytesIO() + tree.write_c14n(f, exclusive=False) + s = f.getvalue() + self.assertEquals(_bytes(''), + s) + f = BytesIO() + tree.write_c14n(f, exclusive=True) + s = f.getvalue() + self.assertEquals(_bytes(''), + s) + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeOnlyTestCase)]) From scoder at codespeak.net Fri Aug 1 17:01:24 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Aug 2008 17:01:24 +0200 (CEST) Subject: [Lxml-checkins] r56896 - lxml/trunk Message-ID: <20080801150124.48041169EEE@codespeak.net> Author: scoder Date: Fri Aug 1 17:01:22 2008 New Revision: 56896 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r4714 at delle: sbehnel | 2008-08-01 08:04:05 +0200 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Aug 1 17:01:22 2008 @@ -25,6 +25,47 @@ ------------- +2.1.1 (2008-07-24) +================== + +Features added +-------------- + +Bugs fixed +---------- + +* Crash when parsing XSLT stylesheets in a thread and using them in + another. + +* Encoding problem when including text with ElementInclude under + Python 3. + +Other changes +------------- + + +2.0.8 (2008-07-24) +================== + +Features added +-------------- + +* ``lxml.html.rewrite_links()`` strips links to work around documents + with whitespace in URL attributes. + +Bugs fixed +---------- + +* Crash when parsing XSLT stylesheets in a thread and using them in + another. + +* CSS selector parser dropped remaining expression after a function + with parameters. + +Other changes +------------- + + 2.1 (2008-07-09) ================ From scoder at codespeak.net Fri Aug 1 17:01:29 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Aug 2008 17:01:29 +0200 (CEST) Subject: [Lxml-checkins] r56897 - in lxml/trunk: . doc Message-ID: <20080801150129.3B196169EF0@codespeak.net> Author: scoder Date: Fri Aug 1 17:01:28 2008 New Revision: 56897 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/main.txt Log: r4715 at delle: sbehnel | 2008-08-01 08:22:56 +0200 doc cleanup Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Fri Aug 1 17:01:28 2008 @@ -147,8 +147,8 @@ source release. If you can't wait, consider trying a less recent release version first. -The latest version is `lxml 2.1`_, released 2008-07-09 -(`changes for 2.1`_). `Older versions`_ are listed below. +The latest version is `lxml 2.1.1`_, released 2008-07-24 +(`changes for 2.1.1`_). `Older versions`_ are listed below. Please take a look at the `installation instructions`_! @@ -220,15 +220,13 @@ `2.0 `_ and the `current in-development version `_. -.. _`PDF documentation`: lxmldoc-2.1.pdf +.. _`PDF documentation`: lxmldoc-2.1.1.pdf -* `lxml 2.1beta3`_, released 2008-06-19 (`changes for 2.1beta3`_) +* `lxml 2.1.1`_, released 2008-07-24 (`changes for 2.1.1`_) -* `lxml 2.1beta2`_, released 2008-05-02 (`changes for 2.1beta2`_) +* `lxml 2.1`_, released 2008-07-09 (`changes for 2.1`_) -* `lxml 2.1beta1`_, released 2008-04-15 (`changes for 2.1beta1`_) - -* `lxml 2.1alpha1`_, released 2008-03-27 (`changes for 2.1alpha1`_) +* `lxml 2.0.8`_, released 2008-07-24 (`changes for 2.0.8`_) * `lxml 2.0.7`_, released 2008-06-20 (`changes for 2.0.7`_) @@ -296,11 +294,9 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.1.1`: lxml-2.1.1.tgz .. _`lxml 2.1`: lxml-2.1.tgz -.. _`lxml 2.1beta3`: lxml-2.1beta3.tgz -.. _`lxml 2.1beta2`: lxml-2.1beta2.tgz -.. _`lxml 2.1beta1`: lxml-2.1beta1.tgz -.. _`lxml 2.1alpha1`: lxml-2.1alpha1.tgz +.. _`lxml 2.0.8`: lxml-2.0.8.tgz .. _`lxml 2.0.7`: lxml-2.0.7.tgz .. _`lxml 2.0.6`: lxml-2.0.6.tgz .. _`lxml 2.0.5`: lxml-2.0.5.tgz @@ -334,11 +330,9 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.1.1`: changes-2.1.1.html .. _`changes for 2.1`: changes-2.1.html -.. _`changes for 2.1beta3`: changes-2.1beta3.html -.. _`changes for 2.1beta2`: changes-2.1beta2.html -.. _`changes for 2.1beta1`: changes-2.1beta1.html -.. _`changes for 2.1alpha1`: changes-2.1alpha1.html +.. _`changes for 2.0.8`: changes-2.0.8.html .. _`changes for 2.0.7`: changes-2.0.7.html .. _`changes for 2.0.6`: changes-2.0.6.html .. _`changes for 2.0.5`: changes-2.0.5.html From scoder at codespeak.net Fri Aug 8 08:15:02 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Aug 2008 08:15:02 +0200 (CEST) Subject: [Lxml-checkins] r57091 - in lxml/trunk: . src/lxml Message-ID: <20080808061502.8C74C169E83@codespeak.net> Author: scoder Date: Fri Aug 8 08:14:58 2008 New Revision: 57091 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/xmlerror.pxi lxml/trunk/src/lxml/xslt.pxi Log: r4719 at delle: sbehnel | 2008-08-07 19:33:08 +0200 only log XSLT errors when we run an XSLT Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Fri Aug 8 08:14:58 2008 @@ -25,11 +25,10 @@ # divert error messages to the global error log xmlerror.xmlThrDefSetStructuredErrorFunc(NULL, _receiveError) - connectErrorLog(NULL) + xmlerror.xmlSetStructuredErrorFunc(NULL, _receiveError) cdef void connectErrorLog(void* log): xmlerror.xmlSetStructuredErrorFunc(log, _receiveError) - xslt.xsltSetGenericErrorFunc(log, _receiveXSLTError) # Logging classes @@ -323,6 +322,15 @@ self._first_error = entry python.PyList_Append(self._entries, entry) +cdef class _XSLTErrorLog(_ErrorLog): + cdef void connect(self): + _ErrorLog.connect(self) + xslt.xsltSetGenericErrorFunc(self, _receiveXSLTError) + + cdef void disconnect(self): + xslt.xsltSetGenericErrorFunc(NULL, NULL) + _ErrorLog.disconnect(self) + cdef class _DomainErrorLog(_ErrorLog): def __init__(self, domains): _ErrorLog.__init__(self) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Fri Aug 8 08:14:58 2008 @@ -375,7 +375,7 @@ u"string://__STRING__XSLT__%d" % id(self)) c_doc.URL = tree.xmlStrdup(_cstr(doc_url_utf)) - self._error_log = _ErrorLog() + self._error_log = _XSLTErrorLog() self._xslt_resolver_context = _XSLTResolverContext() _initXSLTResolverContext(self._xslt_resolver_context, doc._parser) # keep a copy in case we need to access the stylesheet via 'document()' @@ -609,7 +609,7 @@ cdef xmlDoc* c_doc new_xslt = NEW_XSLT(XSLT) # without calling __init__() new_xslt._access_control = stylesheet._access_control - new_xslt._error_log = _ErrorLog() + new_xslt._error_log = _XSLTErrorLog() new_xslt._context = stylesheet._context._copy() new_xslt._xslt_resolver_context = stylesheet._xslt_resolver_context._copy() From scoder at codespeak.net Fri Aug 8 08:15:08 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Aug 2008 08:15:08 +0200 (CEST) Subject: [Lxml-checkins] r57092 - in lxml/trunk: . doc Message-ID: <20080808061508.EE5D8169E8C@codespeak.net> Author: scoder Date: Fri Aug 8 08:15:07 2008 New Revision: 57092 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/parsing.txt lxml/trunk/doc/tutorial.txt Log: r4720 at delle: sbehnel | 2008-08-08 08:14:49 +0200 document that a target parser can be reused if its .close() methods resets it correctly Modified: lxml/trunk/doc/parsing.txt ============================================================================== --- lxml/trunk/doc/parsing.txt (original) +++ lxml/trunk/doc/parsing.txt Fri Aug 8 08:15:07 2008 @@ -282,7 +282,8 @@ >>> parser = etree.XMLParser(target = EchoTarget()) - >>> result = etree.XML("sometext", parser) + >>> result = etree.XML("sometext", + ... parser) start element {} data u'some' comment comment @@ -293,15 +294,35 @@ >>> print(result) closed! -Note that the parser does *not* build a tree in this case. The result -of the parser run is whatever the target object returns from its -``close()`` method. If you want to return an XML tree here, you have -to create it programmatically in the target object. An example for a -parser target that builds a tree is the ``TreeBuilder``. +It is important for the ``.close()`` method to reset the parser target +to a usable state, so that you can reuse the parser as often as you +like: + +.. sourcecode:: pycon + + >>> result = etree.XML("sometext", + ... parser) + start element {} + data u'some' + comment comment + data u'text' + end element + close + + >>> print(result) + closed! + +Note that the parser does *not* build a tree when using a parser +target. The result of the parser run is whatever the target object +returns from its ``.close()`` method. If you want to return an XML +tree here, you have to create it programmatically in the target +object. An example for a parser target that builds a tree is the +``TreeBuilder``. >>> parser = etree.XMLParser(target = etree.TreeBuilder()) - >>> result = etree.XML("sometext", parser) + >>> result = etree.XML("sometext", + ... parser) >>> print(result.tag) element Modified: lxml/trunk/doc/tutorial.txt ============================================================================== --- lxml/trunk/doc/tutorial.txt (original) +++ lxml/trunk/doc/tutorial.txt Fri Aug 8 08:15:07 2008 @@ -919,13 +919,44 @@ >>> class ParserTarget: ... events = [] + ... close_count = 0 ... def start(self, tag, attrib): ... self.events.append(("start", tag, attrib)) ... def close(self): - ... return self.events + ... events, self.events = self.events, [] + ... self.close_count += 1 + ... return events - >>> parser = etree.XMLParser(target=ParserTarget()) + >>> parser_target = ParserTarget() + + >>> parser = etree.XMLParser(target=parser_target) + >>> events = etree.fromstring('', parser) + + >>> print parser_target.close_count + 1 + + >>> for event in events: + ... print('event: %s - tag: %s' % (event[0], event[1])) + ... for attr, value in event[2].items(): + ... print(' * %s = %s' % (attr, value)) + event: start - tag: root + * test = true + +You can reuse the parser and its target as often as you like, so you +should take care that the ``.close()`` methods really resets the +target to a usable state (also in the case of an error!). + +.. sourcecode:: pycon + + >>> events = etree.fromstring('', parser) + >>> print parser_target.close_count + 2 + >>> events = etree.fromstring('', parser) + >>> print parser_target.close_count + 3 >>> events = etree.fromstring('', parser) + >>> print parser_target.close_count + 4 >>> for event in events: ... print('event: %s - tag: %s' % (event[0], event[1])) From scoder at codespeak.net Fri Aug 8 17:47:47 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Aug 2008 17:47:47 +0200 (CEST) Subject: [Lxml-checkins] r57104 - in lxml/trunk: . src/lxml Message-ID: <20080808154747.52758169F0E@codespeak.net> Author: scoder Date: Fri Aug 8 17:47:45 2008 New Revision: 57104 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/xmlerror.pxi lxml/trunk/src/lxml/xslt.pxi Log: r4723 at delle: sbehnel | 2008-08-08 17:47:37 +0200 reverted XSLT logging patch - doesn't seem to have a positive impact Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Fri Aug 8 17:47:45 2008 @@ -25,10 +25,11 @@ # divert error messages to the global error log xmlerror.xmlThrDefSetStructuredErrorFunc(NULL, _receiveError) - xmlerror.xmlSetStructuredErrorFunc(NULL, _receiveError) + connectErrorLog(NULL) cdef void connectErrorLog(void* log): xmlerror.xmlSetStructuredErrorFunc(log, _receiveError) + xslt.xsltSetGenericErrorFunc(log, _receiveXSLTError) # Logging classes @@ -322,15 +323,6 @@ self._first_error = entry python.PyList_Append(self._entries, entry) -cdef class _XSLTErrorLog(_ErrorLog): - cdef void connect(self): - _ErrorLog.connect(self) - xslt.xsltSetGenericErrorFunc(self, _receiveXSLTError) - - cdef void disconnect(self): - xslt.xsltSetGenericErrorFunc(NULL, NULL) - _ErrorLog.disconnect(self) - cdef class _DomainErrorLog(_ErrorLog): def __init__(self, domains): _ErrorLog.__init__(self) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Fri Aug 8 17:47:45 2008 @@ -375,7 +375,7 @@ u"string://__STRING__XSLT__%d" % id(self)) c_doc.URL = tree.xmlStrdup(_cstr(doc_url_utf)) - self._error_log = _XSLTErrorLog() + self._error_log = _ErrorLog() self._xslt_resolver_context = _XSLTResolverContext() _initXSLTResolverContext(self._xslt_resolver_context, doc._parser) # keep a copy in case we need to access the stylesheet via 'document()' @@ -609,7 +609,7 @@ cdef xmlDoc* c_doc new_xslt = NEW_XSLT(XSLT) # without calling __init__() new_xslt._access_control = stylesheet._access_control - new_xslt._error_log = _XSLTErrorLog() + new_xslt._error_log = _ErrorLog() new_xslt._context = stylesheet._context._copy() new_xslt._xslt_resolver_context = stylesheet._xslt_resolver_context._copy() From scoder at codespeak.net Fri Aug 8 18:03:50 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Aug 2008 18:03:50 +0200 (CEST) Subject: [Lxml-checkins] r57105 - lxml/branch/lxml-2.1/doc Message-ID: <20080808160350.5DDB6169FD6@codespeak.net> Author: scoder Date: Fri Aug 8 18:03:49 2008 New Revision: 57105 Modified: lxml/branch/lxml-2.1/doc/main.txt Log: site update after release of 2.0.8 Modified: lxml/branch/lxml-2.1/doc/main.txt ============================================================================== --- lxml/branch/lxml-2.1/doc/main.txt (original) +++ lxml/branch/lxml-2.1/doc/main.txt Fri Aug 8 18:03:49 2008 @@ -232,6 +232,8 @@ * `lxml 2.1alpha1`_, released 2008-03-27 (`changes for 2.1alpha1`_) +* `lxml 2.0.8`_, released 2008-07-24 (`changes for 2.0.7`_) + * `lxml 2.0.7`_, released 2008-06-20 (`changes for 2.0.7`_) * `lxml 2.0.6`_, released 2008-05-31 (`changes for 2.0.6`_) @@ -304,6 +306,7 @@ .. _`lxml 2.1beta2`: lxml-2.1beta2.tgz .. _`lxml 2.1beta1`: lxml-2.1beta1.tgz .. _`lxml 2.1alpha1`: lxml-2.1alpha1.tgz +.. _`lxml 2.0.8`: lxml-2.0.8.tgz .. _`lxml 2.0.7`: lxml-2.0.7.tgz .. _`lxml 2.0.6`: lxml-2.0.6.tgz .. _`lxml 2.0.5`: lxml-2.0.5.tgz @@ -343,6 +346,7 @@ .. _`changes for 2.1beta2`: changes-2.1beta2.html .. _`changes for 2.1beta1`: changes-2.1beta1.html .. _`changes for 2.1alpha1`: changes-2.1alpha1.html +.. _`changes for 2.0.8`: changes-2.0.8.html .. _`changes for 2.0.7`: changes-2.0.7.html .. _`changes for 2.0.6`: changes-2.0.6.html .. _`changes for 2.0.5`: changes-2.0.5.html From scoder at codespeak.net Fri Aug 8 18:04:40 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Aug 2008 18:04:40 +0200 (CEST) Subject: [Lxml-checkins] r57106 - in lxml/branch/lxml-2.1: . doc Message-ID: <20080808160440.ABB61169FD6@codespeak.net> Author: scoder Date: Fri Aug 8 18:04:40 2008 New Revision: 57106 Modified: lxml/branch/lxml-2.1/CHANGES.txt lxml/branch/lxml-2.1/doc/xpathxslt.txt Log: doc merge from trunk Modified: lxml/branch/lxml-2.1/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.1/CHANGES.txt (original) +++ lxml/branch/lxml-2.1/CHANGES.txt Fri Aug 8 18:04:40 2008 @@ -27,8 +27,8 @@ Features added -------------- -* Smart strings can be switched off in XPath (``smart_string`` keyword - option). +* Smart strings can be switched off in XPath (``smart_strings`` + keyword option). * ``lxml.html.rewrite_links()`` strips links to work around documents with whitespace in URL attributes. Modified: lxml/branch/lxml-2.1/doc/xpathxslt.txt ============================================================================== --- lxml/branch/lxml-2.1/doc/xpathxslt.txt (original) +++ lxml/branch/lxml-2.1/doc/xpathxslt.txt Fri Aug 8 18:04:40 2008 @@ -185,6 +185,29 @@ construct strings that do not have an origin. For them, ``getparent()`` will return None. +There are certain cases where the smart string behaviour is +undesirable. For example, it means that the tree will be kept alive +by the string, which may have a considerable memory impact in the case +that the string value is the only thing in the tree that is actually +of interest. For these cases, you can deactivate the parental +relationship using the keyword argument ``smart_strings``. + + >>> root = etree.XML("TEXT") + + >>> find_text = etree.XPath("//text()") + >>> text = find_text(root)[0] + >>> print(text) + TEXT + >>> print(text.getparent().text) + TEXT + + >>> find_text = etree.XPath("//text()", smart_strings=False) + >>> text = find_text(root)[0] + >>> print(text) + TEXT + >>> hasattr(text, 'getparent') + False + Generating XPath expressions ---------------------------- From scoder at codespeak.net Sat Aug 9 11:21:47 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 9 Aug 2008 11:21:47 +0200 (CEST) Subject: [Lxml-checkins] r57117 - lxml/trunk Message-ID: <20080809092147.05DA5169F8F@codespeak.net> Author: scoder Date: Sat Aug 9 11:21:46 2008 New Revision: 57117 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r4725 at delle: sbehnel | 2008-08-08 17:49:13 +0200 changelog cleanup Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Aug 9 11:21:46 2008 @@ -15,12 +15,6 @@ Bugs fixed ---------- -* Crash when parsing XSLT stylesheets in a thread and using them in - another. - -* Encoding problem when including text with ElementInclude under - Python 3. - Other changes ------------- From scoder at codespeak.net Sat Aug 9 11:21:52 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 9 Aug 2008 11:21:52 +0200 (CEST) Subject: [Lxml-checkins] r57118 - in lxml/trunk: . doc Message-ID: <20080809092152.BA521169F95@codespeak.net> Author: scoder Date: Sat Aug 9 11:21:52 2008 New Revision: 57118 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/doc/xpathxslt.txt Log: r4726 at delle: sbehnel | 2008-08-08 18:02:10 +0200 doctest on XPath smart_strings option Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Aug 9 11:21:52 2008 @@ -66,8 +66,8 @@ Features added -------------- -* Smart strings can be switched off in XPath (``smart_string`` keyword - option). +* Smart strings can be switched off in XPath (``smart_strings`` + keyword option). * ``lxml.html.rewrite_links()`` strips links to work around documents with whitespace in URL attributes. Modified: lxml/trunk/doc/xpathxslt.txt ============================================================================== --- lxml/trunk/doc/xpathxslt.txt (original) +++ lxml/trunk/doc/xpathxslt.txt Sat Aug 9 11:21:52 2008 @@ -185,6 +185,29 @@ construct strings that do not have an origin. For them, ``getparent()`` will return None. +There are certain cases where the smart string behaviour is +undesirable. For example, it means that the tree will be kept alive +by the string, which may have a considerable memory impact in the case +that the string value is the only thing in the tree that is actually +of interest. For these cases, you can deactivate the parental +relationship using the keyword argument ``smart_strings``. + + >>> root = etree.XML("TEXT") + + >>> find_text = etree.XPath("//text()") + >>> text = find_text(root)[0] + >>> print(text) + TEXT + >>> print(text.getparent().text) + TEXT + + >>> find_text = etree.XPath("//text()", smart_strings=False) + >>> text = find_text(root)[0] + >>> print(text) + TEXT + >>> hasattr(text, 'getparent') + False + Generating XPath expressions ---------------------------- From scoder at codespeak.net Sat Aug 9 11:21:56 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 9 Aug 2008 11:21:56 +0200 (CEST) Subject: [Lxml-checkins] r57119 - lxml/trunk Message-ID: <20080809092156.022DB169FA8@codespeak.net> Author: scoder Date: Sat Aug 9 11:21:56 2008 New Revision: 57119 Added: lxml/trunk/IDEAS.txt Modified: lxml/trunk/ (props changed) lxml/trunk/TODO.txt Log: r4727 at delle: sbehnel | 2008-08-09 11:17:28 +0200 new text file IDEAS.txt about things to try out Added: lxml/trunk/IDEAS.txt ============================================================================== --- (empty file) +++ lxml/trunk/IDEAS.txt Sat Aug 9 11:21:56 2008 @@ -0,0 +1,19 @@ +Things to try out when life permits +=================================== + +* generating XML using the ``with`` statement + + http://comments.gmane.org/gmane.comp.python.general/579950?set_lines=100000 + +* parse-time validation against a user provided DTD + + * currently only works for XML Schema + +* somehow integrate RelaxNG compact notation (rnc versus rng) + + * currently not supported by libxml2 (patch exists) + +* support subclassing XSLTAccessControl to provide custom per-URL + access check methods + + * maybe custom resolvers are enough, or can be combined with this? Modified: lxml/trunk/TODO.txt ============================================================================== --- lxml/trunk/TODO.txt (original) +++ lxml/trunk/TODO.txt Sat Aug 9 11:21:56 2008 @@ -46,20 +46,6 @@ attributes, not XML children -Features --------- - -* RelaxNG compact notation (rnc versus rng) support. Currently not supported - by libxml2 (patch exists) - - -XSLT ----- - -* Support subclassing XSLTAccessControl to provide custom per-URL - access check methods - - Maybe ----- From scoder at codespeak.net Sat Aug 9 11:29:18 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 9 Aug 2008 11:29:18 +0200 (CEST) Subject: [Lxml-checkins] r57121 - lxml/branch/lxml-2.1 Message-ID: <20080809092918.6A2E9169FA8@codespeak.net> Author: scoder Date: Sat Aug 9 11:29:17 2008 New Revision: 57121 Modified: lxml/branch/lxml-2.1/CREDITS.txt Log: typo Modified: lxml/branch/lxml-2.1/CREDITS.txt ============================================================================== --- lxml/branch/lxml-2.1/CREDITS.txt (original) +++ lxml/branch/lxml-2.1/CREDITS.txt Sat Aug 9 11:29:17 2008 @@ -17,7 +17,7 @@ Holger Joukl bug reports, feedback and development on lxml.objectify -Sidnei da Sivla +Sidnei da Silva official MS Windows builds Marc-Antoine Parent From scoder at codespeak.net Sat Aug 9 11:29:44 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 9 Aug 2008 11:29:44 +0200 (CEST) Subject: [Lxml-checkins] r57122 - lxml/branch/lxml-2.0 Message-ID: <20080809092944.3AB9B169FA8@codespeak.net> Author: scoder Date: Sat Aug 9 11:29:43 2008 New Revision: 57122 Modified: lxml/branch/lxml-2.0/CREDITS.txt Log: typo Modified: lxml/branch/lxml-2.0/CREDITS.txt ============================================================================== --- lxml/branch/lxml-2.0/CREDITS.txt (original) +++ lxml/branch/lxml-2.0/CREDITS.txt Sat Aug 9 11:29:43 2008 @@ -10,7 +10,7 @@ Holger Joukl - bug reports, feedback and development on lxml.objectify -Sidnei da Sivla - official MS Windows builds +Sidnei da Silva - official MS Windows builds Marc-Antoine Parent - XPath extension function help and patches From scoder at codespeak.net Sat Aug 9 11:30:05 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 9 Aug 2008 11:30:05 +0200 (CEST) Subject: [Lxml-checkins] r57123 - lxml/trunk Message-ID: <20080809093005.33801169FA8@codespeak.net> Author: scoder Date: Sat Aug 9 11:30:04 2008 New Revision: 57123 Modified: lxml/trunk/ (props changed) lxml/trunk/CREDITS.txt Log: r4733 at delle: sbehnel | 2008-08-09 11:28:48 +0200 typo Modified: lxml/trunk/CREDITS.txt ============================================================================== --- lxml/trunk/CREDITS.txt (original) +++ lxml/trunk/CREDITS.txt Sat Aug 9 11:30:04 2008 @@ -17,7 +17,7 @@ Holger Joukl bug reports, feedback and development on lxml.objectify -Sidnei da Sivla +Sidnei da Silva official MS Windows builds Marc-Antoine Parent From scoder at codespeak.net Sat Aug 9 11:30:09 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 9 Aug 2008 11:30:09 +0200 (CEST) Subject: [Lxml-checkins] r57124 - in lxml/trunk: . doc Message-ID: <20080809093009.B7270169FAA@codespeak.net> Author: scoder Date: Sat Aug 9 11:30:09 2008 New Revision: 57124 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/FAQ.txt Log: r4734 at delle: sbehnel | 2008-08-09 11:30:00 +0200 docs Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Sat Aug 9 11:30:09 2008 @@ -405,6 +405,19 @@ How can I contribute? --------------------- +If you find something that you would like lxml to do (or do better), +then please tell us about it on the `mailing list`_. Patches are +always appreciated, especially when accompanied by unit tests and +documentation (doctests would be great). See the ``tests`` +subdirectories in the lxml source tree (below the ``src`` directory) +and the ReST_ `text files`_ in the ``doc`` directory. + +We also have a `list of missing features`_ that we would like to +implement but didn't due to lack if time. If you find the time, +patches are very welcome. + +.. _`list of missing features`: http://codespeak.net/svn/lxml/trunk/IDEAS.txt + Besides enhancing the code, there are a lot of places where you can help the project and its user base. You can @@ -426,9 +439,6 @@ or an idea how to make it more readable and accessible while you are reading it, please send a comment to the `mailing list`_. -.. _ReST: http://docutils.sourceforge.net/rst.html -.. _`text files`: http://codespeak.net/svn/lxml/trunk/doc/ - * help with the tutorial. A tutorial is the most important stating point for new users, so it is important for us to provide an easy to understand guide into lxml. As allo documentation, the tutorial is work in progress, so we @@ -440,6 +450,9 @@ you can try to write up a better description and send it to the `mailing list`_. +.. _ReST: http://docutils.sourceforge.net/rst.html +.. _`text files`: http://codespeak.net/svn/lxml/trunk/doc/ + Bugs ==== From scoder at codespeak.net Tue Aug 19 21:39:52 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 19 Aug 2008 21:39:52 +0200 (CEST) Subject: [Lxml-checkins] r57470 - lxml/trunk Message-ID: <20080819193952.2E387169FD5@codespeak.net> Author: scoder Date: Tue Aug 19 21:39:51 2008 New Revision: 57470 Modified: lxml/trunk/ (props changed) lxml/trunk/IDEAS.txt Log: r4739 at delle: sbehnel | 2008-08-15 14:52:32 +0200 future todo Modified: lxml/trunk/IDEAS.txt ============================================================================== --- lxml/trunk/IDEAS.txt (original) +++ lxml/trunk/IDEAS.txt Tue Aug 19 21:39:51 2008 @@ -1,6 +1,11 @@ Things to try out when life permits =================================== +* zlib-based parsing/serialising of compressed in-memory data + + * requires a libxml2 I/O OutputBuffer with appropriate I/O functions + that handle a zlib buffer + * generating XML using the ``with`` statement http://comments.gmane.org/gmane.comp.python.general/579950?set_lines=100000 From scoder at codespeak.net Tue Aug 19 21:39:57 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 19 Aug 2008 21:39:57 +0200 (CEST) Subject: [Lxml-checkins] r57471 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20080819193957.1A99C169FE0@codespeak.net> Author: scoder Date: Tue Aug 19 21:39:56 2008 New Revision: 57471 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/tests/test_xslt.py lxml/trunk/src/lxml/xslt.pxi Log: r4740 at delle: sbehnel | 2008-08-15 21:51:44 +0200 fix XSLT document(relativeURL) resolving for stylesheets parsed from strings Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Tue Aug 19 21:39:56 2008 @@ -862,6 +862,57 @@ self.assertEquals(root[3].get("value"), 'B') + def test_xslt_resolver_url_building(self): + assertEquals = self.assertEquals + called = {'count' : 0} + expected_url = None + class TestResolver(etree.Resolver): + def resolve(self, url, id, context): + assertEquals(url, expected_url) + called['count'] += 1 + return self.resolve_string('', context) + + stylesheet_xml = _bytes("""\ + + + + + +""") + + parser = etree.XMLParser() + parser.resolvers.add(TestResolver()) + + # test without base_url => relative path only + expected_url = 'test.xml' + xslt = etree.XSLT(etree.XML(stylesheet_xml, parser)) + + self.assertEquals(called['count'], 0) + result = xslt(etree.XML('')) + self.assertEquals(called['count'], 1) + + # now the same thing with a stylesheet base URL on the filesystem + called['count'] = 0 + expected_url = os.path.join('MY', 'BASE', 'test.xml') + xslt = etree.XSLT(etree.XML(stylesheet_xml, parser, + base_url=os.path.join('MY', 'BASE', 'FILE'))) + + self.assertEquals(called['count'], 0) + result = xslt(etree.XML('')) + self.assertEquals(called['count'], 1) + + # now the same thing with a stylesheet base URL + called['count'] = 0 + expected_url = 'http://server.com/BASE/DIR/test.xml' + xslt = etree.XSLT(etree.XML(stylesheet_xml, parser, + base_url='http://server.com/BASE/DIR/FILE')) + + self.assertEquals(called['count'], 0) + result = xslt(etree.XML('')) + self.assertEquals(called['count'], 1) + def test_xslt_document_parse_allow(self): access_control = etree.XSLTAccessControl(read_file=True) xslt = etree.XSLT(etree.parse(fileInTestDir("test-document.xslt")), Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Tue Aug 19 21:39:56 2008 @@ -84,14 +84,9 @@ # delegate to the Python resolvers try: resolvers = context._resolvers - if cstd.strncmp('string://', c_uri, 9) == 0: - uri = _decodeFilename(c_uri + 9) - if cstd.strncmp('string://', context._c_style_doc.URL, 9) != 0 and \ - cstd.strcmp('', context._c_style_doc.URL) != 0: - # stylesheet URL known => make the target URL absolute - uri = os_path_join(_decodeFilename(context._c_style_doc.URL), uri) - else: - uri = _decodeFilename(c_uri) + if cstd.strncmp('string://__STRING__XSLT__/', c_uri, 26) == 0: + c_uri += 26 + uri = _decodeFilename(c_uri) doc_ref = resolvers.resolve(uri, None, context) c_doc = NULL @@ -372,7 +367,7 @@ # make sure we always have a stylesheet URL if c_doc.URL is NULL: doc_url_utf = python.PyUnicode_AsASCIIString( - u"string://__STRING__XSLT__%d" % id(self)) + u"string://__STRING__XSLT__/%d.xslt" % id(self)) c_doc.URL = tree.xmlStrdup(_cstr(doc_url_utf)) self._error_log = _ErrorLog() From scoder at codespeak.net Tue Aug 19 21:40:05 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 19 Aug 2008 21:40:05 +0200 (CEST) Subject: [Lxml-checkins] r57472 - in lxml/trunk: . src/lxml Message-ID: <20080819194005.B49A1169FEE@codespeak.net> Author: scoder Date: Tue Aug 19 21:40:03 2008 New Revision: 57472 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/docloader.pxi Log: r4741 at delle: sbehnel | 2008-08-18 09:18:42 +0200 docstrings Modified: lxml/trunk/src/lxml/docloader.pxi ============================================================================== --- lxml/trunk/src/lxml/docloader.pxi (original) +++ lxml/trunk/src/lxml/docloader.pxi Tue Aug 19 21:40:03 2008 @@ -42,9 +42,9 @@ Return a parsable string as input document. - Pass data string and context as parameters. - - You can pass the source URL as 'base_url' keyword. + Pass data string and context as parameters. You can pass the + source URL or filename through the ``base_url`` keyword + argument. """ cdef _InputDocument doc_ref doc_ref = _InputDocument() @@ -59,7 +59,8 @@ Return the name of a parsable file as input document. - Pass filename and context as parameters. + Pass filename and context as parameters. You can also pass a + URL with an HTTP, FTP or file target. """ cdef _InputDocument doc_ref doc_ref = _InputDocument() @@ -72,7 +73,12 @@ Return an open file-like object as input document. - Pass open file and context as parameters. + Pass open file and context as parameters. You can pass the + base URL or filename of the file through the ``base_url`` + keyword argument. + + Note that using ``.resolve_filename()`` is more efficient, + especially in threaded environments. """ cdef _InputDocument doc_ref try: From scoder at codespeak.net Tue Aug 19 21:40:12 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 19 Aug 2008 21:40:12 +0200 (CEST) Subject: [Lxml-checkins] r57473 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20080819194012.18D38169FFE@codespeak.net> Author: scoder Date: Tue Aug 19 21:40:10 2008 New Revision: 57473 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/tests/test_etree.py Log: r4742 at delle: sbehnel | 2008-08-19 21:35:29 +0200 try making file names of file-like objects absolute Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Aug 19 21:40:10 2008 @@ -8,6 +8,11 @@ Features added -------------- +* lxml.etree now tries to find the absolute path name of files when + parsing from a file-like object. This helps custom resolvers when + resolving relative URLs, as lixbml2 can prepend them with the path + of the source document. + * New options for exclusive C14N and C14N without comments. * Instantiating a custom Element classes creates a new Element. Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Tue Aug 19 21:40:10 2008 @@ -1381,16 +1381,19 @@ Returns None if not a file object. """ # file instances have a name attribute - filename = getattr3(source, u'name', None) - if filename is not None: - return filename - # gzip file instances have a filename attribute - filename = getattr3(source, u'filename', None) - if filename is not None: - return filename + if isinstance(source, file): + return os_path_abspath(source.name) # urllib2 provides a geturl() method geturl = getattr3(source, u'geturl', None) if geturl is not None: return geturl() + # gzip file instances have a filename attribute + filename = getattr3(source, u'filename', None) + if filename is not None: + return os_path_abspath(filename) + # this is mostly for backwards compatibility + filename = getattr3(source, u'name', None) + if filename is not None: + return os_path_abspath(filename) # can't determine filename return None Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Aug 19 21:40:10 2008 @@ -32,8 +32,8 @@ del __builtin__ -cdef object os_path_join -from os.path import join as os_path_join +cdef object os_path_abspath +from os.path import abspath as os_path_abspath cdef object BytesIO, StringIO try: Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Tue Aug 19 21:40:10 2008 @@ -664,6 +664,29 @@ self.assertEquals( root[0].attrib, {'default': 'valueB'}) + def test_resolve_filename_dtd_relative(self): + parse = self.etree.parse + parser = self.etree.XMLParser(attribute_defaults=True) + assertEqual = self.assertEqual + test_url = _str("__nosuch.dtd") + + class MyResolver(self.etree.Resolver): + def resolve(self, url, id, context): + assertEqual(url, fileInTestDir(test_url)) + return self.resolve_filename( + fileInTestDir('test.dtd'), context) + + parser.resolvers.add(MyResolver()) + + xml = _str('') % test_url + tree = parse(StringIO(xml), parser, + base_url=fileInTestDir('__test.xml')) + root = tree.getroot() + self.assertEquals( + root.attrib, {'default': 'valueA'}) + self.assertEquals( + root[0].attrib, {'default': 'valueB'}) + def test_resolve_empty(self): parse = self.etree.parse parser = self.etree.XMLParser(load_dtd=True) From scoder at codespeak.net Tue Aug 19 22:51:38 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 19 Aug 2008 22:51:38 +0200 (CEST) Subject: [Lxml-checkins] r57478 - in lxml/branch/lxml-2.1: . src/lxml src/lxml/tests Message-ID: <20080819205138.4168616A017@codespeak.net> Author: scoder Date: Tue Aug 19 22:51:34 2008 New Revision: 57478 Modified: lxml/branch/lxml-2.1/CHANGES.txt lxml/branch/lxml-2.1/src/lxml/apihelpers.pxi lxml/branch/lxml-2.1/src/lxml/lxml.etree.pyx lxml/branch/lxml-2.1/src/lxml/tests/test_etree.py Log: trunk merge: try finding the absolute path name of file-like objects Modified: lxml/branch/lxml-2.1/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.1/CHANGES.txt (original) +++ lxml/branch/lxml-2.1/CHANGES.txt Tue Aug 19 22:51:34 2008 @@ -2,6 +2,24 @@ lxml changelog ============== +Under development +================= + +Features added +-------------- + +* lxml.etree now tries to find the absolute path name of files when + parsing from a file-like object. This helps custom resolvers when + resolving relative URLs, as lixbml2 can prepend them with the path + of the source document. + +Bugs fixed +---------- + +Other changes +------------- + + 2.1.1 (2008-07-24) ================== Modified: lxml/branch/lxml-2.1/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/lxml-2.1/src/lxml/apihelpers.pxi (original) +++ lxml/branch/lxml-2.1/src/lxml/apihelpers.pxi Tue Aug 19 22:51:34 2008 @@ -1351,16 +1351,19 @@ Returns None if not a file object. """ # file instances have a name attribute - filename = getattr3(source, u'name', None) - if filename is not None: - return filename - # gzip file instances have a filename attribute - filename = getattr3(source, u'filename', None) - if filename is not None: - return filename + if isinstance(source, file): + return os_path_abspath(source.name) # urllib2 provides a geturl() method geturl = getattr3(source, u'geturl', None) if geturl is not None: return geturl() + # gzip file instances have a filename attribute + filename = getattr3(source, u'filename', None) + if filename is not None: + return os_path_abspath(filename) + # this is mostly for backwards compatibility + filename = getattr3(source, u'name', None) + if filename is not None: + return os_path_abspath(filename) # can't determine filename return None Modified: lxml/branch/lxml-2.1/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/branch/lxml-2.1/src/lxml/lxml.etree.pyx (original) +++ lxml/branch/lxml-2.1/src/lxml/lxml.etree.pyx Tue Aug 19 22:51:34 2008 @@ -32,6 +32,8 @@ del __builtin__ +cdef object os_path_abspath +from os.path import abspath as os_path_abspath cdef object os_path_join from os.path import join as os_path_join Modified: lxml/branch/lxml-2.1/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/lxml-2.1/src/lxml/tests/test_etree.py (original) +++ lxml/branch/lxml-2.1/src/lxml/tests/test_etree.py Tue Aug 19 22:51:34 2008 @@ -664,6 +664,29 @@ self.assertEquals( root[0].attrib, {'default': 'valueB'}) + def test_resolve_filename_dtd_relative(self): + parse = self.etree.parse + parser = self.etree.XMLParser(attribute_defaults=True) + assertEqual = self.assertEqual + test_url = _str("__nosuch.dtd") + + class MyResolver(self.etree.Resolver): + def resolve(self, url, id, context): + assertEqual(url, fileInTestDir(test_url)) + return self.resolve_filename( + fileInTestDir('test.dtd'), context) + + parser.resolvers.add(MyResolver()) + + xml = _str('') % test_url + tree = parse(StringIO(xml), parser, + base_url=fileInTestDir('__test.xml')) + root = tree.getroot() + self.assertEquals( + root.attrib, {'default': 'valueA'}) + self.assertEquals( + root[0].attrib, {'default': 'valueB'}) + def test_resolve_empty(self): parse = self.etree.parse parser = self.etree.XMLParser(load_dtd=True) From jholg at codespeak.net Thu Aug 21 08:21:49 2008 From: jholg at codespeak.net (jholg at codespeak.net) Date: Thu, 21 Aug 2008 08:21:49 +0200 (CEST) Subject: [Lxml-checkins] r57527 - in lxml/trunk/src/lxml: . tests Message-ID: <20080821062149.9299C16A148@codespeak.net> Author: jholg Date: Thu Aug 21 08:21:48 2008 New Revision: 57527 Modified: lxml/trunk/src/lxml/objectpath.pxi lxml/trunk/src/lxml/tests/test_objectify.py Log: ObjectPath(...)(root, default) now returns default even if root element does not match for absolute paths. Modified: lxml/trunk/src/lxml/objectpath.pxi ============================================================================== --- lxml/trunk/src/lxml/objectpath.pxi (original) +++ lxml/trunk/src/lxml/objectpath.pxi Thu Aug 21 08:21:48 2008 @@ -199,9 +199,12 @@ if c_href is NULL or c_href[0] == c'\0': c_href = tree._getNs(c_node) if not cetree.tagMatches(c_node, c_href, c_name): - raise ValueError, \ - u"root element does not match: need %s, got %s" % \ - (cetree.namespacedNameFromNsName(c_href, c_name), root.tag) + if use_default: + return default_value + else: + raise ValueError, \ + u"root element does not match: need %s, got %s" % \ + (cetree.namespacedNameFromNsName(c_href, c_name), root.tag) while c_node is not NULL: c_path_len = c_path_len - 1 Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Thu Aug 21 08:21:48 2008 @@ -1942,6 +1942,21 @@ root = self.XML(xml_str) path = objectify.ObjectPath( "root.c1.c99" ) self.assertRaises(AttributeError, path, root) + + def test_object_path_default_absolute(self): + root = self.XML(xml_str) + path = objectify.ObjectPath( "root.c1.c99" ) + self.assertEquals(None, path(root, None)) + path = objectify.ObjectPath( "root.c99.c2" ) + self.assertEquals(None, path(root, None)) + path = objectify.ObjectPath( "notroot.c99.c2" ) + self.assertEquals(None, path(root, None)) + + def test_object_path_default_relative(self): + root = self.XML(xml_str) + path = objectify.ObjectPath( ".c1.c99" ) + self.assertEquals(None, path(root, None)) + path = objectify.ObjectPath( ".c99.c2" ) self.assertEquals(None, path(root, None)) def test_object_path_syntax(self): @@ -1952,6 +1967,12 @@ path = objectify.ObjectPath(" root.{objectified} c1.c2 [ 0 ] ") self.assertEquals(root.c1.c2.text, path(root).text) + def test_object_path_fail_parse_empty(self): + self.assertRaises(ValueError, objectify.ObjectPath, "") + + def test_object_path_fail_parse_empty_list(self): + self.assertRaises(ValueError, objectify.ObjectPath, []) + def test_object_path_hasattr(self): root = self.XML(xml_str) path = objectify.ObjectPath( "root" ) From scoder at codespeak.net Sat Aug 23 14:03:55 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 23 Aug 2008 14:03:55 +0200 (CEST) Subject: [Lxml-checkins] r57597 - in lxml/trunk: . src/lxml Message-ID: <20080823120355.BCD6316A247@codespeak.net> Author: scoder Date: Sat Aug 23 14:03:51 2008 New Revision: 57597 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r4747 at delle: sbehnel | 2008-08-23 13:54:34 +0200 doc fix Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Sat Aug 23 14:03:51 2008 @@ -1542,7 +1542,8 @@ property docinfo: u"""Information about the document provided by parser and DTD. This value is only defined for ElementTree objects based on the root node - of a parsed document (e.g. those returned by the parse functions). + of a parsed document (e.g. those returned by the parse functions), + not for trees that were built manually. """ def __get__(self): self._assertHasRoot() From scoder at codespeak.net Sat Aug 23 14:04:06 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 23 Aug 2008 14:04:06 +0200 (CEST) Subject: [Lxml-checkins] r57598 - in lxml/trunk: . src/lxml Message-ID: <20080823120406.6941016A24A@codespeak.net> Author: scoder Date: Sat Aug 23 14:04:04 2008 New Revision: 57598 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/apihelpers.pxi Log: r4748 at delle: sbehnel | 2008-08-23 14:03:44 +0200 Py3k fix Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Sat Aug 23 14:04:04 2008 @@ -1381,19 +1381,16 @@ Returns None if not a file object. """ # file instances have a name attribute - if isinstance(source, file): - return os_path_abspath(source.name) + filename = getattr3(source, u'name', None) + if filename is not None: + return os_path_abspath(filename) # urllib2 provides a geturl() method geturl = getattr3(source, u'geturl', None) if geturl is not None: return geturl() - # gzip file instances have a filename attribute + # gzip file instances have a filename attribute (before Py3k) filename = getattr3(source, u'filename', None) if filename is not None: return os_path_abspath(filename) - # this is mostly for backwards compatibility - filename = getattr3(source, u'name', None) - if filename is not None: - return os_path_abspath(filename) # can't determine filename return None