From scoder at codespeak.net Fri Aug 1 17:01:20 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Aug 2008 17:01:20 +0200 (CEST)
Subject: [Lxml-checkins] r56895 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20080801150120.48915169EE5@codespeak.net>
Author: scoder
Date: Fri Aug 1 17:01:18 2008
New Revision: 56895
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/lxml.etree.pyx
lxml/trunk/src/lxml/serializer.pxi
lxml/trunk/src/lxml/tests/test_etree.py
Log:
r4713 at delle: sbehnel | 2008-08-01 08:00:08 +0200
new C14N options: exclusive, with_comments
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Aug 1 17:01:18 2008
@@ -8,6 +8,8 @@
Features added
--------------
+* New options for exclusive C14N and C14N without comments.
+
* Instantiating a custom Element classes creates a new Element.
Bugs fixed
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Aug 1 17:01:18 2008
@@ -1806,13 +1806,13 @@
self._assertHasRoot()
XInclude()(self._context_node)
- def write_c14n(self, file):
- u"""write_c14n(self, file)
+ def write_c14n(self, file, *, exclusive=False, with_comments=True):
+ u"""write_c14n(self, file, exclusive=False, with_comments=True)
C14N write of document. Always writes UTF-8.
"""
self._assertHasRoot()
- _tofilelikeC14N(file, self._context_node)
+ _tofilelikeC14N(file, self._context_node, exclusive, with_comments)
cdef _ElementTree _elementTreeFactory(_Document doc, _Element context_node):
return _newElementTree(doc, context_node, _ElementTree)
Modified: lxml/trunk/src/lxml/serializer.pxi
==============================================================================
--- lxml/trunk/src/lxml/serializer.pxi (original)
+++ lxml/trunk/src/lxml/serializer.pxi Fri Aug 1 17:01:18 2008
@@ -366,7 +366,7 @@
else:
writer._exc_context._raise_if_stored()
-cdef _tofilelikeC14N(f, _Element element):
+cdef _tofilelikeC14N(f, _Element element, bint exclusive, bint with_comments):
cdef _FilelikeWriter writer
cdef tree.xmlOutputBuffer* c_buffer
cdef char* c_filename
@@ -381,13 +381,14 @@
filename8 = _encodeFilename(f)
c_filename = _cstr(filename8)
with nogil:
- bytes = c14n.xmlC14NDocSave(c_doc, NULL, 0, NULL, 1,
- c_filename, 0)
+ bytes = c14n.xmlC14NDocSave(c_doc, NULL, exclusive, NULL,
+ with_comments, c_filename, 0)
elif hasattr(f, u'write'):
writer = _FilelikeWriter(f)
c_buffer = writer._createOutputBuffer(NULL)
writer.error_log.connect()
- bytes = c14n.xmlC14NDocSaveTo(c_doc, NULL, 0, NULL, 1, c_buffer)
+ bytes = c14n.xmlC14NDocSaveTo(c_doc, NULL, exclusive, NULL,
+ with_comments, c_buffer)
writer.error_log.disconnect()
tree.xmlOutputBufferClose(c_buffer)
else:
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Fri Aug 1 17:01:18 2008
@@ -7,7 +7,7 @@
test_elementtree
"""
-import os.path, unittest, copy, sys, operator
+import os.path, unittest, copy, sys, operator, tempfile
this_dir = os.path.dirname(__file__)
if this_dir not in sys.path:
@@ -2393,6 +2393,57 @@
self.assertEquals(_bytes(''),
s)
+ def test_c14n_file(self):
+ tree = self.parse(_bytes(''))
+ handle, filename = tempfile.mkstemp()
+ try:
+ tree.write_c14n(filename)
+ f = open(filename, 'rb')
+ data = f.read()
+ f.close()
+ finally:
+ os.close(handle)
+ os.remove(filename)
+ self.assertEquals(_bytes(''),
+ data)
+
+ def test_c14n_with_comments(self):
+ tree = self.parse(_bytes(''))
+ f = BytesIO()
+ tree.write_c14n(f)
+ s = f.getvalue()
+ self.assertEquals(_bytes('\n\n'),
+ s)
+ f = BytesIO()
+ tree.write_c14n(f, with_comments=True)
+ s = f.getvalue()
+ self.assertEquals(_bytes('\n\n'),
+ s)
+ f = BytesIO()
+ tree.write_c14n(f, with_comments=False)
+ s = f.getvalue()
+ self.assertEquals(_bytes(''),
+ s)
+
+ def test_c14n_exclusive(self):
+ tree = self.parse(_bytes(
+ ''))
+ f = BytesIO()
+ tree.write_c14n(f)
+ s = f.getvalue()
+ self.assertEquals(_bytes(''),
+ s)
+ f = BytesIO()
+ tree.write_c14n(f, exclusive=False)
+ s = f.getvalue()
+ self.assertEquals(_bytes(''),
+ s)
+ f = BytesIO()
+ tree.write_c14n(f, exclusive=True)
+ s = f.getvalue()
+ self.assertEquals(_bytes(''),
+ s)
+
def test_suite():
suite = unittest.TestSuite()
suite.addTests([unittest.makeSuite(ETreeOnlyTestCase)])
From scoder at codespeak.net Fri Aug 1 17:01:24 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Aug 2008 17:01:24 +0200 (CEST)
Subject: [Lxml-checkins] r56896 - lxml/trunk
Message-ID: <20080801150124.48041169EEE@codespeak.net>
Author: scoder
Date: Fri Aug 1 17:01:22 2008
New Revision: 56896
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
Log:
r4714 at delle: sbehnel | 2008-08-01 08:04:05 +0200
changelog
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Aug 1 17:01:22 2008
@@ -25,6 +25,47 @@
-------------
+2.1.1 (2008-07-24)
+==================
+
+Features added
+--------------
+
+Bugs fixed
+----------
+
+* Crash when parsing XSLT stylesheets in a thread and using them in
+ another.
+
+* Encoding problem when including text with ElementInclude under
+ Python 3.
+
+Other changes
+-------------
+
+
+2.0.8 (2008-07-24)
+==================
+
+Features added
+--------------
+
+* ``lxml.html.rewrite_links()`` strips links to work around documents
+ with whitespace in URL attributes.
+
+Bugs fixed
+----------
+
+* Crash when parsing XSLT stylesheets in a thread and using them in
+ another.
+
+* CSS selector parser dropped remaining expression after a function
+ with parameters.
+
+Other changes
+-------------
+
+
2.1 (2008-07-09)
================
From scoder at codespeak.net Fri Aug 1 17:01:29 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Aug 2008 17:01:29 +0200 (CEST)
Subject: [Lxml-checkins] r56897 - in lxml/trunk: . doc
Message-ID: <20080801150129.3B196169EF0@codespeak.net>
Author: scoder
Date: Fri Aug 1 17:01:28 2008
New Revision: 56897
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/main.txt
Log:
r4715 at delle: sbehnel | 2008-08-01 08:22:56 +0200
doc cleanup
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Fri Aug 1 17:01:28 2008
@@ -147,8 +147,8 @@
source release. If you can't wait, consider trying a less recent
release version first.
-The latest version is `lxml 2.1`_, released 2008-07-09
-(`changes for 2.1`_). `Older versions`_ are listed below.
+The latest version is `lxml 2.1.1`_, released 2008-07-24
+(`changes for 2.1.1`_). `Older versions`_ are listed below.
Please take a look at the `installation instructions`_!
@@ -220,15 +220,13 @@
`2.0 `_ and the `current
in-development version `_.
-.. _`PDF documentation`: lxmldoc-2.1.pdf
+.. _`PDF documentation`: lxmldoc-2.1.1.pdf
-* `lxml 2.1beta3`_, released 2008-06-19 (`changes for 2.1beta3`_)
+* `lxml 2.1.1`_, released 2008-07-24 (`changes for 2.1.1`_)
-* `lxml 2.1beta2`_, released 2008-05-02 (`changes for 2.1beta2`_)
+* `lxml 2.1`_, released 2008-07-09 (`changes for 2.1`_)
-* `lxml 2.1beta1`_, released 2008-04-15 (`changes for 2.1beta1`_)
-
-* `lxml 2.1alpha1`_, released 2008-03-27 (`changes for 2.1alpha1`_)
+* `lxml 2.0.8`_, released 2008-07-24 (`changes for 2.0.8`_)
* `lxml 2.0.7`_, released 2008-06-20 (`changes for 2.0.7`_)
@@ -296,11 +294,9 @@
* `lxml 0.5`_, released 2005-04-08
+.. _`lxml 2.1.1`: lxml-2.1.1.tgz
.. _`lxml 2.1`: lxml-2.1.tgz
-.. _`lxml 2.1beta3`: lxml-2.1beta3.tgz
-.. _`lxml 2.1beta2`: lxml-2.1beta2.tgz
-.. _`lxml 2.1beta1`: lxml-2.1beta1.tgz
-.. _`lxml 2.1alpha1`: lxml-2.1alpha1.tgz
+.. _`lxml 2.0.8`: lxml-2.0.8.tgz
.. _`lxml 2.0.7`: lxml-2.0.7.tgz
.. _`lxml 2.0.6`: lxml-2.0.6.tgz
.. _`lxml 2.0.5`: lxml-2.0.5.tgz
@@ -334,11 +330,9 @@
.. _`lxml 0.5.1`: lxml-0.5.1.tgz
.. _`lxml 0.5`: lxml-0.5.tgz
+.. _`changes for 2.1.1`: changes-2.1.1.html
.. _`changes for 2.1`: changes-2.1.html
-.. _`changes for 2.1beta3`: changes-2.1beta3.html
-.. _`changes for 2.1beta2`: changes-2.1beta2.html
-.. _`changes for 2.1beta1`: changes-2.1beta1.html
-.. _`changes for 2.1alpha1`: changes-2.1alpha1.html
+.. _`changes for 2.0.8`: changes-2.0.8.html
.. _`changes for 2.0.7`: changes-2.0.7.html
.. _`changes for 2.0.6`: changes-2.0.6.html
.. _`changes for 2.0.5`: changes-2.0.5.html
From scoder at codespeak.net Fri Aug 8 08:15:02 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Aug 2008 08:15:02 +0200 (CEST)
Subject: [Lxml-checkins] r57091 - in lxml/trunk: . src/lxml
Message-ID: <20080808061502.8C74C169E83@codespeak.net>
Author: scoder
Date: Fri Aug 8 08:14:58 2008
New Revision: 57091
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/xmlerror.pxi
lxml/trunk/src/lxml/xslt.pxi
Log:
r4719 at delle: sbehnel | 2008-08-07 19:33:08 +0200
only log XSLT errors when we run an XSLT
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Fri Aug 8 08:14:58 2008
@@ -25,11 +25,10 @@
# divert error messages to the global error log
xmlerror.xmlThrDefSetStructuredErrorFunc(NULL, _receiveError)
- connectErrorLog(NULL)
+ xmlerror.xmlSetStructuredErrorFunc(NULL, _receiveError)
cdef void connectErrorLog(void* log):
xmlerror.xmlSetStructuredErrorFunc(log, _receiveError)
- xslt.xsltSetGenericErrorFunc(log, _receiveXSLTError)
# Logging classes
@@ -323,6 +322,15 @@
self._first_error = entry
python.PyList_Append(self._entries, entry)
+cdef class _XSLTErrorLog(_ErrorLog):
+ cdef void connect(self):
+ _ErrorLog.connect(self)
+ xslt.xsltSetGenericErrorFunc(self, _receiveXSLTError)
+
+ cdef void disconnect(self):
+ xslt.xsltSetGenericErrorFunc(NULL, NULL)
+ _ErrorLog.disconnect(self)
+
cdef class _DomainErrorLog(_ErrorLog):
def __init__(self, domains):
_ErrorLog.__init__(self)
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Fri Aug 8 08:14:58 2008
@@ -375,7 +375,7 @@
u"string://__STRING__XSLT__%d" % id(self))
c_doc.URL = tree.xmlStrdup(_cstr(doc_url_utf))
- self._error_log = _ErrorLog()
+ self._error_log = _XSLTErrorLog()
self._xslt_resolver_context = _XSLTResolverContext()
_initXSLTResolverContext(self._xslt_resolver_context, doc._parser)
# keep a copy in case we need to access the stylesheet via 'document()'
@@ -609,7 +609,7 @@
cdef xmlDoc* c_doc
new_xslt = NEW_XSLT(XSLT) # without calling __init__()
new_xslt._access_control = stylesheet._access_control
- new_xslt._error_log = _ErrorLog()
+ new_xslt._error_log = _XSLTErrorLog()
new_xslt._context = stylesheet._context._copy()
new_xslt._xslt_resolver_context = stylesheet._xslt_resolver_context._copy()
From scoder at codespeak.net Fri Aug 8 08:15:08 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Aug 2008 08:15:08 +0200 (CEST)
Subject: [Lxml-checkins] r57092 - in lxml/trunk: . doc
Message-ID: <20080808061508.EE5D8169E8C@codespeak.net>
Author: scoder
Date: Fri Aug 8 08:15:07 2008
New Revision: 57092
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/parsing.txt
lxml/trunk/doc/tutorial.txt
Log:
r4720 at delle: sbehnel | 2008-08-08 08:14:49 +0200
document that a target parser can be reused if its .close() methods resets it correctly
Modified: lxml/trunk/doc/parsing.txt
==============================================================================
--- lxml/trunk/doc/parsing.txt (original)
+++ lxml/trunk/doc/parsing.txt Fri Aug 8 08:15:07 2008
@@ -282,7 +282,8 @@
>>> parser = etree.XMLParser(target = EchoTarget())
- >>> result = etree.XML("sometext", parser)
+ >>> result = etree.XML("sometext",
+ ... parser)
start element {}
data u'some'
comment comment
@@ -293,15 +294,35 @@
>>> print(result)
closed!
-Note that the parser does *not* build a tree in this case. The result
-of the parser run is whatever the target object returns from its
-``close()`` method. If you want to return an XML tree here, you have
-to create it programmatically in the target object. An example for a
-parser target that builds a tree is the ``TreeBuilder``.
+It is important for the ``.close()`` method to reset the parser target
+to a usable state, so that you can reuse the parser as often as you
+like:
+
+.. sourcecode:: pycon
+
+ >>> result = etree.XML("sometext",
+ ... parser)
+ start element {}
+ data u'some'
+ comment comment
+ data u'text'
+ end element
+ close
+
+ >>> print(result)
+ closed!
+
+Note that the parser does *not* build a tree when using a parser
+target. The result of the parser run is whatever the target object
+returns from its ``.close()`` method. If you want to return an XML
+tree here, you have to create it programmatically in the target
+object. An example for a parser target that builds a tree is the
+``TreeBuilder``.
>>> parser = etree.XMLParser(target = etree.TreeBuilder())
- >>> result = etree.XML("sometext", parser)
+ >>> result = etree.XML("sometext",
+ ... parser)
>>> print(result.tag)
element
Modified: lxml/trunk/doc/tutorial.txt
==============================================================================
--- lxml/trunk/doc/tutorial.txt (original)
+++ lxml/trunk/doc/tutorial.txt Fri Aug 8 08:15:07 2008
@@ -919,13 +919,44 @@
>>> class ParserTarget:
... events = []
+ ... close_count = 0
... def start(self, tag, attrib):
... self.events.append(("start", tag, attrib))
... def close(self):
- ... return self.events
+ ... events, self.events = self.events, []
+ ... self.close_count += 1
+ ... return events
- >>> parser = etree.XMLParser(target=ParserTarget())
+ >>> parser_target = ParserTarget()
+
+ >>> parser = etree.XMLParser(target=parser_target)
+ >>> events = etree.fromstring('', parser)
+
+ >>> print parser_target.close_count
+ 1
+
+ >>> for event in events:
+ ... print('event: %s - tag: %s' % (event[0], event[1]))
+ ... for attr, value in event[2].items():
+ ... print(' * %s = %s' % (attr, value))
+ event: start - tag: root
+ * test = true
+
+You can reuse the parser and its target as often as you like, so you
+should take care that the ``.close()`` methods really resets the
+target to a usable state (also in the case of an error!).
+
+.. sourcecode:: pycon
+
+ >>> events = etree.fromstring('', parser)
+ >>> print parser_target.close_count
+ 2
+ >>> events = etree.fromstring('', parser)
+ >>> print parser_target.close_count
+ 3
>>> events = etree.fromstring('', parser)
+ >>> print parser_target.close_count
+ 4
>>> for event in events:
... print('event: %s - tag: %s' % (event[0], event[1]))
From scoder at codespeak.net Fri Aug 8 17:47:47 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Aug 2008 17:47:47 +0200 (CEST)
Subject: [Lxml-checkins] r57104 - in lxml/trunk: . src/lxml
Message-ID: <20080808154747.52758169F0E@codespeak.net>
Author: scoder
Date: Fri Aug 8 17:47:45 2008
New Revision: 57104
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/xmlerror.pxi
lxml/trunk/src/lxml/xslt.pxi
Log:
r4723 at delle: sbehnel | 2008-08-08 17:47:37 +0200
reverted XSLT logging patch - doesn't seem to have a positive impact
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Fri Aug 8 17:47:45 2008
@@ -25,10 +25,11 @@
# divert error messages to the global error log
xmlerror.xmlThrDefSetStructuredErrorFunc(NULL, _receiveError)
- xmlerror.xmlSetStructuredErrorFunc(NULL, _receiveError)
+ connectErrorLog(NULL)
cdef void connectErrorLog(void* log):
xmlerror.xmlSetStructuredErrorFunc(log, _receiveError)
+ xslt.xsltSetGenericErrorFunc(log, _receiveXSLTError)
# Logging classes
@@ -322,15 +323,6 @@
self._first_error = entry
python.PyList_Append(self._entries, entry)
-cdef class _XSLTErrorLog(_ErrorLog):
- cdef void connect(self):
- _ErrorLog.connect(self)
- xslt.xsltSetGenericErrorFunc(self, _receiveXSLTError)
-
- cdef void disconnect(self):
- xslt.xsltSetGenericErrorFunc(NULL, NULL)
- _ErrorLog.disconnect(self)
-
cdef class _DomainErrorLog(_ErrorLog):
def __init__(self, domains):
_ErrorLog.__init__(self)
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Fri Aug 8 17:47:45 2008
@@ -375,7 +375,7 @@
u"string://__STRING__XSLT__%d" % id(self))
c_doc.URL = tree.xmlStrdup(_cstr(doc_url_utf))
- self._error_log = _XSLTErrorLog()
+ self._error_log = _ErrorLog()
self._xslt_resolver_context = _XSLTResolverContext()
_initXSLTResolverContext(self._xslt_resolver_context, doc._parser)
# keep a copy in case we need to access the stylesheet via 'document()'
@@ -609,7 +609,7 @@
cdef xmlDoc* c_doc
new_xslt = NEW_XSLT(XSLT) # without calling __init__()
new_xslt._access_control = stylesheet._access_control
- new_xslt._error_log = _XSLTErrorLog()
+ new_xslt._error_log = _ErrorLog()
new_xslt._context = stylesheet._context._copy()
new_xslt._xslt_resolver_context = stylesheet._xslt_resolver_context._copy()
From scoder at codespeak.net Fri Aug 8 18:03:50 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Aug 2008 18:03:50 +0200 (CEST)
Subject: [Lxml-checkins] r57105 - lxml/branch/lxml-2.1/doc
Message-ID: <20080808160350.5DDB6169FD6@codespeak.net>
Author: scoder
Date: Fri Aug 8 18:03:49 2008
New Revision: 57105
Modified:
lxml/branch/lxml-2.1/doc/main.txt
Log:
site update after release of 2.0.8
Modified: lxml/branch/lxml-2.1/doc/main.txt
==============================================================================
--- lxml/branch/lxml-2.1/doc/main.txt (original)
+++ lxml/branch/lxml-2.1/doc/main.txt Fri Aug 8 18:03:49 2008
@@ -232,6 +232,8 @@
* `lxml 2.1alpha1`_, released 2008-03-27 (`changes for 2.1alpha1`_)
+* `lxml 2.0.8`_, released 2008-07-24 (`changes for 2.0.7`_)
+
* `lxml 2.0.7`_, released 2008-06-20 (`changes for 2.0.7`_)
* `lxml 2.0.6`_, released 2008-05-31 (`changes for 2.0.6`_)
@@ -304,6 +306,7 @@
.. _`lxml 2.1beta2`: lxml-2.1beta2.tgz
.. _`lxml 2.1beta1`: lxml-2.1beta1.tgz
.. _`lxml 2.1alpha1`: lxml-2.1alpha1.tgz
+.. _`lxml 2.0.8`: lxml-2.0.8.tgz
.. _`lxml 2.0.7`: lxml-2.0.7.tgz
.. _`lxml 2.0.6`: lxml-2.0.6.tgz
.. _`lxml 2.0.5`: lxml-2.0.5.tgz
@@ -343,6 +346,7 @@
.. _`changes for 2.1beta2`: changes-2.1beta2.html
.. _`changes for 2.1beta1`: changes-2.1beta1.html
.. _`changes for 2.1alpha1`: changes-2.1alpha1.html
+.. _`changes for 2.0.8`: changes-2.0.8.html
.. _`changes for 2.0.7`: changes-2.0.7.html
.. _`changes for 2.0.6`: changes-2.0.6.html
.. _`changes for 2.0.5`: changes-2.0.5.html
From scoder at codespeak.net Fri Aug 8 18:04:40 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Aug 2008 18:04:40 +0200 (CEST)
Subject: [Lxml-checkins] r57106 - in lxml/branch/lxml-2.1: . doc
Message-ID: <20080808160440.ABB61169FD6@codespeak.net>
Author: scoder
Date: Fri Aug 8 18:04:40 2008
New Revision: 57106
Modified:
lxml/branch/lxml-2.1/CHANGES.txt
lxml/branch/lxml-2.1/doc/xpathxslt.txt
Log:
doc merge from trunk
Modified: lxml/branch/lxml-2.1/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-2.1/CHANGES.txt (original)
+++ lxml/branch/lxml-2.1/CHANGES.txt Fri Aug 8 18:04:40 2008
@@ -27,8 +27,8 @@
Features added
--------------
-* Smart strings can be switched off in XPath (``smart_string`` keyword
- option).
+* Smart strings can be switched off in XPath (``smart_strings``
+ keyword option).
* ``lxml.html.rewrite_links()`` strips links to work around documents
with whitespace in URL attributes.
Modified: lxml/branch/lxml-2.1/doc/xpathxslt.txt
==============================================================================
--- lxml/branch/lxml-2.1/doc/xpathxslt.txt (original)
+++ lxml/branch/lxml-2.1/doc/xpathxslt.txt Fri Aug 8 18:04:40 2008
@@ -185,6 +185,29 @@
construct strings that do not have an origin. For them,
``getparent()`` will return None.
+There are certain cases where the smart string behaviour is
+undesirable. For example, it means that the tree will be kept alive
+by the string, which may have a considerable memory impact in the case
+that the string value is the only thing in the tree that is actually
+of interest. For these cases, you can deactivate the parental
+relationship using the keyword argument ``smart_strings``.
+
+ >>> root = etree.XML("TEXT")
+
+ >>> find_text = etree.XPath("//text()")
+ >>> text = find_text(root)[0]
+ >>> print(text)
+ TEXT
+ >>> print(text.getparent().text)
+ TEXT
+
+ >>> find_text = etree.XPath("//text()", smart_strings=False)
+ >>> text = find_text(root)[0]
+ >>> print(text)
+ TEXT
+ >>> hasattr(text, 'getparent')
+ False
+
Generating XPath expressions
----------------------------
From scoder at codespeak.net Sat Aug 9 11:21:47 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 9 Aug 2008 11:21:47 +0200 (CEST)
Subject: [Lxml-checkins] r57117 - lxml/trunk
Message-ID: <20080809092147.05DA5169F8F@codespeak.net>
Author: scoder
Date: Sat Aug 9 11:21:46 2008
New Revision: 57117
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
Log:
r4725 at delle: sbehnel | 2008-08-08 17:49:13 +0200
changelog cleanup
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sat Aug 9 11:21:46 2008
@@ -15,12 +15,6 @@
Bugs fixed
----------
-* Crash when parsing XSLT stylesheets in a thread and using them in
- another.
-
-* Encoding problem when including text with ElementInclude under
- Python 3.
-
Other changes
-------------
From scoder at codespeak.net Sat Aug 9 11:21:52 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 9 Aug 2008 11:21:52 +0200 (CEST)
Subject: [Lxml-checkins] r57118 - in lxml/trunk: . doc
Message-ID: <20080809092152.BA521169F95@codespeak.net>
Author: scoder
Date: Sat Aug 9 11:21:52 2008
New Revision: 57118
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/doc/xpathxslt.txt
Log:
r4726 at delle: sbehnel | 2008-08-08 18:02:10 +0200
doctest on XPath smart_strings option
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sat Aug 9 11:21:52 2008
@@ -66,8 +66,8 @@
Features added
--------------
-* Smart strings can be switched off in XPath (``smart_string`` keyword
- option).
+* Smart strings can be switched off in XPath (``smart_strings``
+ keyword option).
* ``lxml.html.rewrite_links()`` strips links to work around documents
with whitespace in URL attributes.
Modified: lxml/trunk/doc/xpathxslt.txt
==============================================================================
--- lxml/trunk/doc/xpathxslt.txt (original)
+++ lxml/trunk/doc/xpathxslt.txt Sat Aug 9 11:21:52 2008
@@ -185,6 +185,29 @@
construct strings that do not have an origin. For them,
``getparent()`` will return None.
+There are certain cases where the smart string behaviour is
+undesirable. For example, it means that the tree will be kept alive
+by the string, which may have a considerable memory impact in the case
+that the string value is the only thing in the tree that is actually
+of interest. For these cases, you can deactivate the parental
+relationship using the keyword argument ``smart_strings``.
+
+ >>> root = etree.XML("TEXT")
+
+ >>> find_text = etree.XPath("//text()")
+ >>> text = find_text(root)[0]
+ >>> print(text)
+ TEXT
+ >>> print(text.getparent().text)
+ TEXT
+
+ >>> find_text = etree.XPath("//text()", smart_strings=False)
+ >>> text = find_text(root)[0]
+ >>> print(text)
+ TEXT
+ >>> hasattr(text, 'getparent')
+ False
+
Generating XPath expressions
----------------------------
From scoder at codespeak.net Sat Aug 9 11:21:56 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 9 Aug 2008 11:21:56 +0200 (CEST)
Subject: [Lxml-checkins] r57119 - lxml/trunk
Message-ID: <20080809092156.022DB169FA8@codespeak.net>
Author: scoder
Date: Sat Aug 9 11:21:56 2008
New Revision: 57119
Added:
lxml/trunk/IDEAS.txt
Modified:
lxml/trunk/ (props changed)
lxml/trunk/TODO.txt
Log:
r4727 at delle: sbehnel | 2008-08-09 11:17:28 +0200
new text file IDEAS.txt about things to try out
Added: lxml/trunk/IDEAS.txt
==============================================================================
--- (empty file)
+++ lxml/trunk/IDEAS.txt Sat Aug 9 11:21:56 2008
@@ -0,0 +1,19 @@
+Things to try out when life permits
+===================================
+
+* generating XML using the ``with`` statement
+
+ http://comments.gmane.org/gmane.comp.python.general/579950?set_lines=100000
+
+* parse-time validation against a user provided DTD
+
+ * currently only works for XML Schema
+
+* somehow integrate RelaxNG compact notation (rnc versus rng)
+
+ * currently not supported by libxml2 (patch exists)
+
+* support subclassing XSLTAccessControl to provide custom per-URL
+ access check methods
+
+ * maybe custom resolvers are enough, or can be combined with this?
Modified: lxml/trunk/TODO.txt
==============================================================================
--- lxml/trunk/TODO.txt (original)
+++ lxml/trunk/TODO.txt Sat Aug 9 11:21:56 2008
@@ -46,20 +46,6 @@
attributes, not XML children
-Features
---------
-
-* RelaxNG compact notation (rnc versus rng) support. Currently not supported
- by libxml2 (patch exists)
-
-
-XSLT
-----
-
-* Support subclassing XSLTAccessControl to provide custom per-URL
- access check methods
-
-
Maybe
-----
From scoder at codespeak.net Sat Aug 9 11:29:18 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 9 Aug 2008 11:29:18 +0200 (CEST)
Subject: [Lxml-checkins] r57121 - lxml/branch/lxml-2.1
Message-ID: <20080809092918.6A2E9169FA8@codespeak.net>
Author: scoder
Date: Sat Aug 9 11:29:17 2008
New Revision: 57121
Modified:
lxml/branch/lxml-2.1/CREDITS.txt
Log:
typo
Modified: lxml/branch/lxml-2.1/CREDITS.txt
==============================================================================
--- lxml/branch/lxml-2.1/CREDITS.txt (original)
+++ lxml/branch/lxml-2.1/CREDITS.txt Sat Aug 9 11:29:17 2008
@@ -17,7 +17,7 @@
Holger Joukl
bug reports, feedback and development on lxml.objectify
-Sidnei da Sivla
+Sidnei da Silva
official MS Windows builds
Marc-Antoine Parent
From scoder at codespeak.net Sat Aug 9 11:29:44 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 9 Aug 2008 11:29:44 +0200 (CEST)
Subject: [Lxml-checkins] r57122 - lxml/branch/lxml-2.0
Message-ID: <20080809092944.3AB9B169FA8@codespeak.net>
Author: scoder
Date: Sat Aug 9 11:29:43 2008
New Revision: 57122
Modified:
lxml/branch/lxml-2.0/CREDITS.txt
Log:
typo
Modified: lxml/branch/lxml-2.0/CREDITS.txt
==============================================================================
--- lxml/branch/lxml-2.0/CREDITS.txt (original)
+++ lxml/branch/lxml-2.0/CREDITS.txt Sat Aug 9 11:29:43 2008
@@ -10,7 +10,7 @@
Holger Joukl - bug reports, feedback and development on lxml.objectify
-Sidnei da Sivla - official MS Windows builds
+Sidnei da Silva - official MS Windows builds
Marc-Antoine Parent - XPath extension function help and patches
From scoder at codespeak.net Sat Aug 9 11:30:05 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 9 Aug 2008 11:30:05 +0200 (CEST)
Subject: [Lxml-checkins] r57123 - lxml/trunk
Message-ID: <20080809093005.33801169FA8@codespeak.net>
Author: scoder
Date: Sat Aug 9 11:30:04 2008
New Revision: 57123
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CREDITS.txt
Log:
r4733 at delle: sbehnel | 2008-08-09 11:28:48 +0200
typo
Modified: lxml/trunk/CREDITS.txt
==============================================================================
--- lxml/trunk/CREDITS.txt (original)
+++ lxml/trunk/CREDITS.txt Sat Aug 9 11:30:04 2008
@@ -17,7 +17,7 @@
Holger Joukl
bug reports, feedback and development on lxml.objectify
-Sidnei da Sivla
+Sidnei da Silva
official MS Windows builds
Marc-Antoine Parent
From scoder at codespeak.net Sat Aug 9 11:30:09 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 9 Aug 2008 11:30:09 +0200 (CEST)
Subject: [Lxml-checkins] r57124 - in lxml/trunk: . doc
Message-ID: <20080809093009.B7270169FAA@codespeak.net>
Author: scoder
Date: Sat Aug 9 11:30:09 2008
New Revision: 57124
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/FAQ.txt
Log:
r4734 at delle: sbehnel | 2008-08-09 11:30:00 +0200
docs
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Sat Aug 9 11:30:09 2008
@@ -405,6 +405,19 @@
How can I contribute?
---------------------
+If you find something that you would like lxml to do (or do better),
+then please tell us about it on the `mailing list`_. Patches are
+always appreciated, especially when accompanied by unit tests and
+documentation (doctests would be great). See the ``tests``
+subdirectories in the lxml source tree (below the ``src`` directory)
+and the ReST_ `text files`_ in the ``doc`` directory.
+
+We also have a `list of missing features`_ that we would like to
+implement but didn't due to lack if time. If you find the time,
+patches are very welcome.
+
+.. _`list of missing features`: http://codespeak.net/svn/lxml/trunk/IDEAS.txt
+
Besides enhancing the code, there are a lot of places where you can help the
project and its user base. You can
@@ -426,9 +439,6 @@
or an idea how to make it more readable and accessible while you are reading
it, please send a comment to the `mailing list`_.
-.. _ReST: http://docutils.sourceforge.net/rst.html
-.. _`text files`: http://codespeak.net/svn/lxml/trunk/doc/
-
* help with the tutorial. A tutorial is the most important stating point for
new users, so it is important for us to provide an easy to understand guide
into lxml. As allo documentation, the tutorial is work in progress, so we
@@ -440,6 +450,9 @@
you can try to write up a better description and send it to the `mailing
list`_.
+.. _ReST: http://docutils.sourceforge.net/rst.html
+.. _`text files`: http://codespeak.net/svn/lxml/trunk/doc/
+
Bugs
====
From scoder at codespeak.net Tue Aug 19 21:39:52 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 19 Aug 2008 21:39:52 +0200 (CEST)
Subject: [Lxml-checkins] r57470 - lxml/trunk
Message-ID: <20080819193952.2E387169FD5@codespeak.net>
Author: scoder
Date: Tue Aug 19 21:39:51 2008
New Revision: 57470
Modified:
lxml/trunk/ (props changed)
lxml/trunk/IDEAS.txt
Log:
r4739 at delle: sbehnel | 2008-08-15 14:52:32 +0200
future todo
Modified: lxml/trunk/IDEAS.txt
==============================================================================
--- lxml/trunk/IDEAS.txt (original)
+++ lxml/trunk/IDEAS.txt Tue Aug 19 21:39:51 2008
@@ -1,6 +1,11 @@
Things to try out when life permits
===================================
+* zlib-based parsing/serialising of compressed in-memory data
+
+ * requires a libxml2 I/O OutputBuffer with appropriate I/O functions
+ that handle a zlib buffer
+
* generating XML using the ``with`` statement
http://comments.gmane.org/gmane.comp.python.general/579950?set_lines=100000
From scoder at codespeak.net Tue Aug 19 21:39:57 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 19 Aug 2008 21:39:57 +0200 (CEST)
Subject: [Lxml-checkins] r57471 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20080819193957.1A99C169FE0@codespeak.net>
Author: scoder
Date: Tue Aug 19 21:39:56 2008
New Revision: 57471
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/test_xslt.py
lxml/trunk/src/lxml/xslt.pxi
Log:
r4740 at delle: sbehnel | 2008-08-15 21:51:44 +0200
fix XSLT document(relativeURL) resolving for stylesheets parsed from strings
Modified: lxml/trunk/src/lxml/tests/test_xslt.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xslt.py (original)
+++ lxml/trunk/src/lxml/tests/test_xslt.py Tue Aug 19 21:39:56 2008
@@ -862,6 +862,57 @@
self.assertEquals(root[3].get("value"),
'B')
+ def test_xslt_resolver_url_building(self):
+ assertEquals = self.assertEquals
+ called = {'count' : 0}
+ expected_url = None
+ class TestResolver(etree.Resolver):
+ def resolve(self, url, id, context):
+ assertEquals(url, expected_url)
+ called['count'] += 1
+ return self.resolve_string('', context)
+
+ stylesheet_xml = _bytes("""\
+
+
+
+
+
+""")
+
+ parser = etree.XMLParser()
+ parser.resolvers.add(TestResolver())
+
+ # test without base_url => relative path only
+ expected_url = 'test.xml'
+ xslt = etree.XSLT(etree.XML(stylesheet_xml, parser))
+
+ self.assertEquals(called['count'], 0)
+ result = xslt(etree.XML(''))
+ self.assertEquals(called['count'], 1)
+
+ # now the same thing with a stylesheet base URL on the filesystem
+ called['count'] = 0
+ expected_url = os.path.join('MY', 'BASE', 'test.xml')
+ xslt = etree.XSLT(etree.XML(stylesheet_xml, parser,
+ base_url=os.path.join('MY', 'BASE', 'FILE')))
+
+ self.assertEquals(called['count'], 0)
+ result = xslt(etree.XML(''))
+ self.assertEquals(called['count'], 1)
+
+ # now the same thing with a stylesheet base URL
+ called['count'] = 0
+ expected_url = 'http://server.com/BASE/DIR/test.xml'
+ xslt = etree.XSLT(etree.XML(stylesheet_xml, parser,
+ base_url='http://server.com/BASE/DIR/FILE'))
+
+ self.assertEquals(called['count'], 0)
+ result = xslt(etree.XML(''))
+ self.assertEquals(called['count'], 1)
+
def test_xslt_document_parse_allow(self):
access_control = etree.XSLTAccessControl(read_file=True)
xslt = etree.XSLT(etree.parse(fileInTestDir("test-document.xslt")),
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Tue Aug 19 21:39:56 2008
@@ -84,14 +84,9 @@
# delegate to the Python resolvers
try:
resolvers = context._resolvers
- if cstd.strncmp('string://', c_uri, 9) == 0:
- uri = _decodeFilename(c_uri + 9)
- if cstd.strncmp('string://', context._c_style_doc.URL, 9) != 0 and \
- cstd.strcmp('', context._c_style_doc.URL) != 0:
- # stylesheet URL known => make the target URL absolute
- uri = os_path_join(_decodeFilename(context._c_style_doc.URL), uri)
- else:
- uri = _decodeFilename(c_uri)
+ if cstd.strncmp('string://__STRING__XSLT__/', c_uri, 26) == 0:
+ c_uri += 26
+ uri = _decodeFilename(c_uri)
doc_ref = resolvers.resolve(uri, None, context)
c_doc = NULL
@@ -372,7 +367,7 @@
# make sure we always have a stylesheet URL
if c_doc.URL is NULL:
doc_url_utf = python.PyUnicode_AsASCIIString(
- u"string://__STRING__XSLT__%d" % id(self))
+ u"string://__STRING__XSLT__/%d.xslt" % id(self))
c_doc.URL = tree.xmlStrdup(_cstr(doc_url_utf))
self._error_log = _ErrorLog()
From scoder at codespeak.net Tue Aug 19 21:40:05 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 19 Aug 2008 21:40:05 +0200 (CEST)
Subject: [Lxml-checkins] r57472 - in lxml/trunk: . src/lxml
Message-ID: <20080819194005.B49A1169FEE@codespeak.net>
Author: scoder
Date: Tue Aug 19 21:40:03 2008
New Revision: 57472
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/docloader.pxi
Log:
r4741 at delle: sbehnel | 2008-08-18 09:18:42 +0200
docstrings
Modified: lxml/trunk/src/lxml/docloader.pxi
==============================================================================
--- lxml/trunk/src/lxml/docloader.pxi (original)
+++ lxml/trunk/src/lxml/docloader.pxi Tue Aug 19 21:40:03 2008
@@ -42,9 +42,9 @@
Return a parsable string as input document.
- Pass data string and context as parameters.
-
- You can pass the source URL as 'base_url' keyword.
+ Pass data string and context as parameters. You can pass the
+ source URL or filename through the ``base_url`` keyword
+ argument.
"""
cdef _InputDocument doc_ref
doc_ref = _InputDocument()
@@ -59,7 +59,8 @@
Return the name of a parsable file as input document.
- Pass filename and context as parameters.
+ Pass filename and context as parameters. You can also pass a
+ URL with an HTTP, FTP or file target.
"""
cdef _InputDocument doc_ref
doc_ref = _InputDocument()
@@ -72,7 +73,12 @@
Return an open file-like object as input document.
- Pass open file and context as parameters.
+ Pass open file and context as parameters. You can pass the
+ base URL or filename of the file through the ``base_url``
+ keyword argument.
+
+ Note that using ``.resolve_filename()`` is more efficient,
+ especially in threaded environments.
"""
cdef _InputDocument doc_ref
try:
From scoder at codespeak.net Tue Aug 19 21:40:12 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 19 Aug 2008 21:40:12 +0200 (CEST)
Subject: [Lxml-checkins] r57473 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20080819194012.18D38169FFE@codespeak.net>
Author: scoder
Date: Tue Aug 19 21:40:10 2008
New Revision: 57473
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/lxml.etree.pyx
lxml/trunk/src/lxml/tests/test_etree.py
Log:
r4742 at delle: sbehnel | 2008-08-19 21:35:29 +0200
try making file names of file-like objects absolute
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Tue Aug 19 21:40:10 2008
@@ -8,6 +8,11 @@
Features added
--------------
+* lxml.etree now tries to find the absolute path name of files when
+ parsing from a file-like object. This helps custom resolvers when
+ resolving relative URLs, as lixbml2 can prepend them with the path
+ of the source document.
+
* New options for exclusive C14N and C14N without comments.
* Instantiating a custom Element classes creates a new Element.
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Tue Aug 19 21:40:10 2008
@@ -1381,16 +1381,19 @@
Returns None if not a file object.
"""
# file instances have a name attribute
- filename = getattr3(source, u'name', None)
- if filename is not None:
- return filename
- # gzip file instances have a filename attribute
- filename = getattr3(source, u'filename', None)
- if filename is not None:
- return filename
+ if isinstance(source, file):
+ return os_path_abspath(source.name)
# urllib2 provides a geturl() method
geturl = getattr3(source, u'geturl', None)
if geturl is not None:
return geturl()
+ # gzip file instances have a filename attribute
+ filename = getattr3(source, u'filename', None)
+ if filename is not None:
+ return os_path_abspath(filename)
+ # this is mostly for backwards compatibility
+ filename = getattr3(source, u'name', None)
+ if filename is not None:
+ return os_path_abspath(filename)
# can't determine filename
return None
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Aug 19 21:40:10 2008
@@ -32,8 +32,8 @@
del __builtin__
-cdef object os_path_join
-from os.path import join as os_path_join
+cdef object os_path_abspath
+from os.path import abspath as os_path_abspath
cdef object BytesIO, StringIO
try:
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Tue Aug 19 21:40:10 2008
@@ -664,6 +664,29 @@
self.assertEquals(
root[0].attrib, {'default': 'valueB'})
+ def test_resolve_filename_dtd_relative(self):
+ parse = self.etree.parse
+ parser = self.etree.XMLParser(attribute_defaults=True)
+ assertEqual = self.assertEqual
+ test_url = _str("__nosuch.dtd")
+
+ class MyResolver(self.etree.Resolver):
+ def resolve(self, url, id, context):
+ assertEqual(url, fileInTestDir(test_url))
+ return self.resolve_filename(
+ fileInTestDir('test.dtd'), context)
+
+ parser.resolvers.add(MyResolver())
+
+ xml = _str('') % test_url
+ tree = parse(StringIO(xml), parser,
+ base_url=fileInTestDir('__test.xml'))
+ root = tree.getroot()
+ self.assertEquals(
+ root.attrib, {'default': 'valueA'})
+ self.assertEquals(
+ root[0].attrib, {'default': 'valueB'})
+
def test_resolve_empty(self):
parse = self.etree.parse
parser = self.etree.XMLParser(load_dtd=True)
From scoder at codespeak.net Tue Aug 19 22:51:38 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 19 Aug 2008 22:51:38 +0200 (CEST)
Subject: [Lxml-checkins] r57478 - in lxml/branch/lxml-2.1: . src/lxml
src/lxml/tests
Message-ID: <20080819205138.4168616A017@codespeak.net>
Author: scoder
Date: Tue Aug 19 22:51:34 2008
New Revision: 57478
Modified:
lxml/branch/lxml-2.1/CHANGES.txt
lxml/branch/lxml-2.1/src/lxml/apihelpers.pxi
lxml/branch/lxml-2.1/src/lxml/lxml.etree.pyx
lxml/branch/lxml-2.1/src/lxml/tests/test_etree.py
Log:
trunk merge: try finding the absolute path name of file-like objects
Modified: lxml/branch/lxml-2.1/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-2.1/CHANGES.txt (original)
+++ lxml/branch/lxml-2.1/CHANGES.txt Tue Aug 19 22:51:34 2008
@@ -2,6 +2,24 @@
lxml changelog
==============
+Under development
+=================
+
+Features added
+--------------
+
+* lxml.etree now tries to find the absolute path name of files when
+ parsing from a file-like object. This helps custom resolvers when
+ resolving relative URLs, as lixbml2 can prepend them with the path
+ of the source document.
+
+Bugs fixed
+----------
+
+Other changes
+-------------
+
+
2.1.1 (2008-07-24)
==================
Modified: lxml/branch/lxml-2.1/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/branch/lxml-2.1/src/lxml/apihelpers.pxi (original)
+++ lxml/branch/lxml-2.1/src/lxml/apihelpers.pxi Tue Aug 19 22:51:34 2008
@@ -1351,16 +1351,19 @@
Returns None if not a file object.
"""
# file instances have a name attribute
- filename = getattr3(source, u'name', None)
- if filename is not None:
- return filename
- # gzip file instances have a filename attribute
- filename = getattr3(source, u'filename', None)
- if filename is not None:
- return filename
+ if isinstance(source, file):
+ return os_path_abspath(source.name)
# urllib2 provides a geturl() method
geturl = getattr3(source, u'geturl', None)
if geturl is not None:
return geturl()
+ # gzip file instances have a filename attribute
+ filename = getattr3(source, u'filename', None)
+ if filename is not None:
+ return os_path_abspath(filename)
+ # this is mostly for backwards compatibility
+ filename = getattr3(source, u'name', None)
+ if filename is not None:
+ return os_path_abspath(filename)
# can't determine filename
return None
Modified: lxml/branch/lxml-2.1/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/branch/lxml-2.1/src/lxml/lxml.etree.pyx (original)
+++ lxml/branch/lxml-2.1/src/lxml/lxml.etree.pyx Tue Aug 19 22:51:34 2008
@@ -32,6 +32,8 @@
del __builtin__
+cdef object os_path_abspath
+from os.path import abspath as os_path_abspath
cdef object os_path_join
from os.path import join as os_path_join
Modified: lxml/branch/lxml-2.1/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/branch/lxml-2.1/src/lxml/tests/test_etree.py (original)
+++ lxml/branch/lxml-2.1/src/lxml/tests/test_etree.py Tue Aug 19 22:51:34 2008
@@ -664,6 +664,29 @@
self.assertEquals(
root[0].attrib, {'default': 'valueB'})
+ def test_resolve_filename_dtd_relative(self):
+ parse = self.etree.parse
+ parser = self.etree.XMLParser(attribute_defaults=True)
+ assertEqual = self.assertEqual
+ test_url = _str("__nosuch.dtd")
+
+ class MyResolver(self.etree.Resolver):
+ def resolve(self, url, id, context):
+ assertEqual(url, fileInTestDir(test_url))
+ return self.resolve_filename(
+ fileInTestDir('test.dtd'), context)
+
+ parser.resolvers.add(MyResolver())
+
+ xml = _str('') % test_url
+ tree = parse(StringIO(xml), parser,
+ base_url=fileInTestDir('__test.xml'))
+ root = tree.getroot()
+ self.assertEquals(
+ root.attrib, {'default': 'valueA'})
+ self.assertEquals(
+ root[0].attrib, {'default': 'valueB'})
+
def test_resolve_empty(self):
parse = self.etree.parse
parser = self.etree.XMLParser(load_dtd=True)
From jholg at codespeak.net Thu Aug 21 08:21:49 2008
From: jholg at codespeak.net (jholg at codespeak.net)
Date: Thu, 21 Aug 2008 08:21:49 +0200 (CEST)
Subject: [Lxml-checkins] r57527 - in lxml/trunk/src/lxml: . tests
Message-ID: <20080821062149.9299C16A148@codespeak.net>
Author: jholg
Date: Thu Aug 21 08:21:48 2008
New Revision: 57527
Modified:
lxml/trunk/src/lxml/objectpath.pxi
lxml/trunk/src/lxml/tests/test_objectify.py
Log:
ObjectPath(...)(root, default) now returns default even if
root element does not match for absolute paths.
Modified: lxml/trunk/src/lxml/objectpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/objectpath.pxi (original)
+++ lxml/trunk/src/lxml/objectpath.pxi Thu Aug 21 08:21:48 2008
@@ -199,9 +199,12 @@
if c_href is NULL or c_href[0] == c'\0':
c_href = tree._getNs(c_node)
if not cetree.tagMatches(c_node, c_href, c_name):
- raise ValueError, \
- u"root element does not match: need %s, got %s" % \
- (cetree.namespacedNameFromNsName(c_href, c_name), root.tag)
+ if use_default:
+ return default_value
+ else:
+ raise ValueError, \
+ u"root element does not match: need %s, got %s" % \
+ (cetree.namespacedNameFromNsName(c_href, c_name), root.tag)
while c_node is not NULL:
c_path_len = c_path_len - 1
Modified: lxml/trunk/src/lxml/tests/test_objectify.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_objectify.py (original)
+++ lxml/trunk/src/lxml/tests/test_objectify.py Thu Aug 21 08:21:48 2008
@@ -1942,6 +1942,21 @@
root = self.XML(xml_str)
path = objectify.ObjectPath( "root.c1.c99" )
self.assertRaises(AttributeError, path, root)
+
+ def test_object_path_default_absolute(self):
+ root = self.XML(xml_str)
+ path = objectify.ObjectPath( "root.c1.c99" )
+ self.assertEquals(None, path(root, None))
+ path = objectify.ObjectPath( "root.c99.c2" )
+ self.assertEquals(None, path(root, None))
+ path = objectify.ObjectPath( "notroot.c99.c2" )
+ self.assertEquals(None, path(root, None))
+
+ def test_object_path_default_relative(self):
+ root = self.XML(xml_str)
+ path = objectify.ObjectPath( ".c1.c99" )
+ self.assertEquals(None, path(root, None))
+ path = objectify.ObjectPath( ".c99.c2" )
self.assertEquals(None, path(root, None))
def test_object_path_syntax(self):
@@ -1952,6 +1967,12 @@
path = objectify.ObjectPath(" root.{objectified} c1.c2 [ 0 ] ")
self.assertEquals(root.c1.c2.text, path(root).text)
+ def test_object_path_fail_parse_empty(self):
+ self.assertRaises(ValueError, objectify.ObjectPath, "")
+
+ def test_object_path_fail_parse_empty_list(self):
+ self.assertRaises(ValueError, objectify.ObjectPath, [])
+
def test_object_path_hasattr(self):
root = self.XML(xml_str)
path = objectify.ObjectPath( "root" )
From scoder at codespeak.net Sat Aug 23 14:03:55 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 23 Aug 2008 14:03:55 +0200 (CEST)
Subject: [Lxml-checkins] r57597 - in lxml/trunk: . src/lxml
Message-ID: <20080823120355.BCD6316A247@codespeak.net>
Author: scoder
Date: Sat Aug 23 14:03:51 2008
New Revision: 57597
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
r4747 at delle: sbehnel | 2008-08-23 13:54:34 +0200
doc fix
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Sat Aug 23 14:03:51 2008
@@ -1542,7 +1542,8 @@
property docinfo:
u"""Information about the document provided by parser and DTD. This
value is only defined for ElementTree objects based on the root node
- of a parsed document (e.g. those returned by the parse functions).
+ of a parsed document (e.g. those returned by the parse functions),
+ not for trees that were built manually.
"""
def __get__(self):
self._assertHasRoot()
From scoder at codespeak.net Sat Aug 23 14:04:06 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 23 Aug 2008 14:04:06 +0200 (CEST)
Subject: [Lxml-checkins] r57598 - in lxml/trunk: . src/lxml
Message-ID: <20080823120406.6941016A24A@codespeak.net>
Author: scoder
Date: Sat Aug 23 14:04:04 2008
New Revision: 57598
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/apihelpers.pxi
Log:
r4748 at delle: sbehnel | 2008-08-23 14:03:44 +0200
Py3k fix
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Sat Aug 23 14:04:04 2008
@@ -1381,19 +1381,16 @@
Returns None if not a file object.
"""
# file instances have a name attribute
- if isinstance(source, file):
- return os_path_abspath(source.name)
+ filename = getattr3(source, u'name', None)
+ if filename is not None:
+ return os_path_abspath(filename)
# urllib2 provides a geturl() method
geturl = getattr3(source, u'geturl', None)
if geturl is not None:
return geturl()
- # gzip file instances have a filename attribute
+ # gzip file instances have a filename attribute (before Py3k)
filename = getattr3(source, u'filename', None)
if filename is not None:
return os_path_abspath(filename)
- # this is mostly for backwards compatibility
- filename = getattr3(source, u'name', None)
- if filename is not None:
- return os_path_abspath(filename)
# can't determine filename
return None