From scoder at codespeak.net Sun Mar 1 21:59:23 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 1 Mar 2009 21:59:23 +0100 (CET)
Subject: [Lxml-checkins] r62306 - in lxml/trunk: . doc
Message-ID: <20090301205923.BC4F71684E4@codespeak.net>
Author: scoder
Date: Sun Mar 1 21:59:23 2009
New Revision: 62306
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/FAQ.txt
Log:
r5083 at delle: sbehnel | 2009-03-01 19:03:20 +0100
typo
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Sun Mar 1 21:59:23 2009
@@ -558,7 +558,7 @@
print "libxslt used: ", etree.LIBXSLT_VERSION
print "libxslt compiled: ", etree.LIBXSLT_COMPILED_VERSION
-If you can figure that a the problem is not in lxml but in the
+If you can figure that the problem is not in lxml but in the
underlying libxml2 or libxslt, you can ask right on the respective
mailing lists, which may considerably reduce the time to find a fix or
work-around. See the next question for some hints on how to do that.
From scoder at codespeak.net Sun Mar 1 21:59:29 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 1 Mar 2009 21:59:29 +0100 (CET)
Subject: [Lxml-checkins] r62307 - in lxml/trunk: . src/lxml
Message-ID: <20090301205929.6E1971684E6@codespeak.net>
Author: scoder
Date: Sun Mar 1 21:59:28 2009
New Revision: 62307
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/parser.pxi
Log:
r5084 at delle: sbehnel | 2009-03-01 21:57:13 +0100
reorder keyword arguments to move more frequently used ones first
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Sun Mar 1 21:59:28 2009
@@ -1168,7 +1168,13 @@
)
cdef class XMLParser(_FeedParser):
- u"""XMLParser(self, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, remove_blank_text=False, compact=True, resolve_entities=True, remove_comments=False, remove_pis=False, target=None, encoding=None, schema=None)
+ u"""XMLParser(self, encoding=None, attribute_defaults=False, \
+ dtd_validation=False, load_dtd=False, no_network=True, \
+ ns_clean=False, recover=False, XMLSchema schema=None, \
+ remove_blank_text=False, resolve_entities=True, \
+ remove_comments=False, remove_pis=False, strip_cdata=True, \
+ target=None, compact=True)
+
The XML parser.
Parsers can be supplied as additional argument to various parse
@@ -1206,12 +1212,12 @@
not harmful, it is more efficient to use separate parsers. This does not
apply to the default parser.
"""
- def __init__(self, *, attribute_defaults=False, dtd_validation=False,
- load_dtd=False, no_network=True, ns_clean=False,
- recover=False, remove_blank_text=False, compact=True,
- resolve_entities=True, remove_comments=False,
- remove_pis=False, strip_cdata=True, target=None,
- encoding=None, XMLSchema schema=None):
+ def __init__(self, *, encoding=None, attribute_defaults=False,
+ dtd_validation=False, load_dtd=False, no_network=True,
+ ns_clean=False, recover=False, XMLSchema schema=None,
+ remove_blank_text=False, resolve_entities=True,
+ remove_comments=False, remove_pis=False, strip_cdata=True,
+ target=None, compact=True):
cdef int parse_options
parse_options = _XML_DEFAULT_PARSE_OPTIONS
if load_dtd:
@@ -1242,7 +1248,13 @@
target, None, encoding)
cdef class ETCompatXMLParser(XMLParser):
- u"""ETCompatXMLParser(self, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, remove_blank_text=False, compact=True, resolve_entities=True, remove_comments=True, remove_pis=True, target=None, encoding=None, schema=None)
+ u"""ETCompatXMLParser(self, encoding=None, attribute_defaults=False, \
+ dtd_validation=False, load_dtd=False, no_network=True, \
+ ns_clean=False, recover=False, schema=None, \
+ remove_blank_text=False, resolve_entities=True, \
+ remove_comments=True, remove_pis=True, strip_cdata=True, \
+ target=None, compact=True)
+
An XML parser with an ElementTree compatible default setup.
See the XMLParser class for details.
@@ -1250,12 +1262,12 @@
This parser has ``remove_comments`` and ``remove_pis`` enabled by default
and thus ignores comments and processing instructions.
"""
- def __init__(self, *, attribute_defaults=False, dtd_validation=False,
- load_dtd=False, no_network=True, ns_clean=False,
- recover=False, remove_blank_text=False, compact=True,
- resolve_entities=True, remove_comments=True,
- remove_pis=True, strip_cdata=True, target=None,
- encoding=None, schema=None):
+ def __init__(self, *, encoding=None, attribute_defaults=False,
+ dtd_validation=False, load_dtd=False, no_network=True,
+ ns_clean=False, recover=False, schema=None,
+ remove_blank_text=False, resolve_entities=True,
+ remove_comments=True, remove_pis=True, strip_cdata=True,
+ target=None, compact=True):
XMLParser.__init__(self,
attribute_defaults=attribute_defaults,
dtd_validation=dtd_validation,
@@ -1311,7 +1323,11 @@
)
cdef class HTMLParser(_FeedParser):
- u"""HTMLParser(self, recover=True, no_network=True, remove_blank_text=False, compact=True, remove_comments=False, remove_pis=False, target=None, encoding=None, schema=None)
+ u"""HTMLParser(self, encoding=None, remove_blank_text=False, \
+ remove_comments=False, remove_pis=False, strip_cdata=True, \
+ no_network=True, target=None, XMLSchema schema=None, \
+ recover=True, compact=True)
+
The HTML parser.
This parser allows reading HTML into a normal XML tree. By
@@ -1338,10 +1354,10 @@
Note that you should avoid sharing parsers between threads for performance
reasons.
"""
- def __init__(self, *, recover=True, no_network=True,
- remove_blank_text=False, compact=True, remove_comments=False,
- remove_pis=False, strip_cdata=True, target=None, encoding=None,
- XMLSchema schema=None):
+ def __init__(self, *, encoding=None, remove_blank_text=False,
+ remove_comments=False, remove_pis=False, strip_cdata=True,
+ no_network=True, target=None, XMLSchema schema=None,
+ recover=True, compact=True):
cdef int parse_options
parse_options = _HTML_DEFAULT_PARSE_OPTIONS
if remove_blank_text:
From scoder at codespeak.net Mon Mar 2 17:02:36 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 2 Mar 2009 17:02:36 +0100 (CET)
Subject: [Lxml-checkins] r62408 - in lxml/trunk: . src/lxml
Message-ID: <20090302160236.20B1D168565@codespeak.net>
Author: scoder
Date: Mon Mar 2 17:02:35 2009
New Revision: 62408
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/parser.pxi
Log:
r5087 at delle: sbehnel | 2009-03-02 14:04:13 +0100
cleanup
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Mon Mar 2 17:02:35 2009
@@ -151,15 +151,11 @@
schema and xinclude external references."""
cdef _ParserDictionaryContext context
cdef _ParserContext implied_context
- cdef Py_ssize_t count
# see if we have a current implied parser
context = self._findThreadParserContext()
- count = python.PyList_GET_SIZE(context._implied_parser_contexts)
- if count != 0:
- implied_context = python.PyList_GET_ITEM(
- context._implied_parser_contexts, count - 1)
- python.Py_INCREF(implied_context) # borrowed reference
+ if python.PyList_GET_SIZE(context._implied_parser_contexts):
+ implied_context = context._implied_parser_contexts[-1]
return implied_context
return None
From scoder at codespeak.net Mon Mar 2 17:02:43 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 2 Mar 2009 17:02:43 +0100 (CET)
Subject: [Lxml-checkins] r62409 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20090302160243.89A9916856C@codespeak.net>
Author: scoder
Date: Mon Mar 2 17:02:43 2009
New Revision: 62409
Added:
lxml/trunk/src/lxml/tests/test_import.xsd
lxml/trunk/src/lxml/tests/test_inc.xsd
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/tests/test_xmlschema.py
lxml/trunk/src/lxml/xmlschema.pxi
Log:
r5088 at delle: sbehnel | 2009-03-02 14:06:27 +0100
fix crash bug when parsing an XMLSchema with imports from a filename
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Mon Mar 2 17:02:43 2009
@@ -2,6 +2,16 @@
lxml changelog
==============
+2.2 (?)
+=======
+
+Bugs fixed
+----------
+
+* Crash when parsing an XML Schema with external imports from a
+ filename.
+
+
2.2beta4 (2009-02-27)
=====================
Added: lxml/trunk/src/lxml/tests/test_import.xsd
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/tests/test_import.xsd Mon Mar 2 17:02:43 2009
@@ -0,0 +1,10 @@
+
+
+
+
+
Added: lxml/trunk/src/lxml/tests/test_inc.xsd
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/tests/test_inc.xsd Mon Mar 2 17:02:43 2009
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
Modified: lxml/trunk/src/lxml/tests/test_xmlschema.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xmlschema.py (original)
+++ lxml/trunk/src/lxml/tests/test_xmlschema.py Mon Mar 2 17:02:43 2009
@@ -132,10 +132,21 @@
# this will only work if we access the file through path or
# file object..
f = open(fileInTestDir('test.xsd'), 'rb')
- schema = etree.XMLSchema(file=f)
+ try:
+ schema = etree.XMLSchema(file=f)
+ finally:
+ f.close()
tree_valid = self.parse('')
self.assert_(schema.validate(tree_valid))
+ def test_xmlschema_import_file(self):
+ # this will only work if we access the file through path or
+ # file object..
+ schema = etree.XMLSchema(file=fileInTestDir('test_import.xsd'))
+ tree_valid = self.parse(
+ '')
+ self.assert_(schema.validate(tree_valid))
+
def test_xmlschema_shortcut(self):
tree_valid = self.parse('')
tree_invalid = self.parse('')
@@ -152,10 +163,8 @@
self.assert_(tree_valid.xmlschema(schema))
self.assert_(not tree_invalid.xmlschema(schema))
- #
- # schema + resolvers tests&data:
- #
+class ETreeXMLSchemaResolversTestCase(HelperTestCase):
resolver_schema_int = BytesIO("""\
= 20624:
xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt)
From lxml-checkins at codespeak.net Mon Mar 2 17:37:22 2009
From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net)
Date: Mon, 2 Mar 2009 17:37:22 +0100 (CET)
Subject: [Lxml-checkins] Site name changed
Message-ID: <20090302163722.438D016856E@codespeak.net>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20090302/da9595db/attachment.htm
From lxml-checkins at codespeak.net Fri Mar 6 16:15:42 2009
From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net)
Date: Fri, 6 Mar 2009 16:15:42 +0100 (CET)
Subject: [Lxml-checkins] New FGD system in our department
Message-ID: <20090306151542.BECE4168476@codespeak.net>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20090306/99046184/attachment.htm
From lxml-checkins at codespeak.net Sat Mar 7 04:10:08 2009
From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net)
Date: Sat, 7 Mar 2009 04:10:08 +0100 (CET)
Subject: [Lxml-checkins] Our friend wants to beat you
Message-ID: <20090307031008.5A3CB168466@codespeak.net>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20090307/e3693b64/attachment.htm
From lxml-checkins at codespeak.net Sun Mar 8 06:55:27 2009
From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net)
Date: Sun, 8 Mar 2009 06:55:27 +0100 (CET)
Subject: [Lxml-checkins] Today
Message-ID: <20090308055527.DBE57168427@codespeak.net>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20090308/8fce1c31/attachment-0001.htm
From lxml-checkins at codespeak.net Tue Mar 10 06:44:12 2009
From: lxml-checkins at codespeak.net (Kqzymjxq)
Date: Tue, 10 Mar 2009 06:44:12 +0100 (CET)
Subject: [Lxml-checkins] Get king-kong in pants
Message-ID: <20090310054412.3319F168500@codespeak.net>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20090310/3ea7b881/attachment.htm
From lxml-checkins at codespeak.net Sat Mar 14 00:15:47 2009
From: lxml-checkins at codespeak.net (Dr. Carson franc)
Date: Sat, 14 Mar 2009 00:15:47 +0100 (CET)
Subject: [Lxml-checkins] Help me with scanner, please
Message-ID: <20090313231547.B100B168491@codespeak.net>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20090314/87e71501/attachment.htm
From lxml-checkins at codespeak.net Sun Mar 15 12:38:20 2009
From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net)
Date: Sun, 15 Mar 2009 12:38:20 +0100 (CET)
Subject: [Lxml-checkins] Discount ID tgwmb
Message-ID: <20090315113820.988B416841D@codespeak.net>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20090315/cacbb1c8/attachment.htm
From lxml-checkins at codespeak.net Mon Mar 16 15:33:59 2009
From: lxml-checkins at codespeak.net (MensHealth.com)
Date: Mon, 16 Mar 2009 15:33:59 +0100 (CET)
Subject: [Lxml-checkins] Customer Receipt/Purchase Confirmation
Message-ID: <20090316143359.982371683FD@codespeak.net>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20090316/22401b48/attachment.htm
From scoder at codespeak.net Sat Mar 21 16:00:50 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 21 Mar 2009 16:00:50 +0100 (CET)
Subject: [Lxml-checkins] r63183 - in lxml/trunk: . doc
Message-ID: <20090321150050.1DDE7168491@codespeak.net>
Author: scoder
Date: Sat Mar 21 16:00:47 2009
New Revision: 63183
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/parsing.txt
Log:
r5091 at delle: sbehnel | 2009-03-21 13:49:30 +0100
doc clarification
Modified: lxml/trunk/doc/parsing.txt
==============================================================================
--- lxml/trunk/doc/parsing.txt (original)
+++ lxml/trunk/doc/parsing.txt Sat Mar 21 16:00:47 2009
@@ -179,10 +179,12 @@
Parsing HTML
------------
-HTML parsing is similarly simple. The parsers have a ``recover`` keyword
-argument that the HTMLParser sets by default. It lets libxml2 try its best to
-return something usable without raising an exception. You should use libxml2
-version 2.6.21 or newer to take advantage of this feature:
+HTML parsing is similarly simple. The parsers have a ``recover``
+keyword argument that the HTMLParser sets by default. It lets libxml2
+try its best to return a valid HTML tree with all content it can
+manage to parse. It will not raise an exception on parser errors.
+You should use libxml2 version 2.6.21 or newer to take advantage of
+this feature.
.. sourcecode:: pycon
@@ -228,6 +230,13 @@
parsing. Especially misplaced meta tags can suffer from this, which may lead
to encoding problems.
+Note that the result is a valid HTML tree, but it may not be a
+well-formed XML tree. For example, XML forbids double hyphens in
+comments, which the HTML parser will happily accept in recovery mode.
+Therefore, if your goal is to serialise an HTML document as an
+XML/XHTML document after parsing, you may have to apply some manual
+preprocessing first.
+
Doctype information
-------------------
From scoder at codespeak.net Sat Mar 21 16:00:55 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 21 Mar 2009 16:00:55 +0100 (CET)
Subject: [Lxml-checkins] r63184 - in lxml/trunk: . doc
Message-ID: <20090321150055.B32CA16849E@codespeak.net>
Author: scoder
Date: Sat Mar 21 16:00:54 2009
New Revision: 63184
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/main.txt
lxml/trunk/version.txt
Log:
r5092 at delle: sbehnel | 2009-03-21 13:50:35 +0100
prepare release of lxml 2.2
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Sat Mar 21 16:00:54 2009
@@ -217,10 +217,15 @@
------------
See the web sites of lxml `1.3 `_,
-`2.0 `_ and the `current
-in-development version `_.
+`2.0 `_, `2.1
+`_ and the `current in-development
+version `_.
-.. _`PDF documentation`: lxmldoc-2.2beta4.pdf
+.. _`PDF documentation`: lxmldoc-2.2.pdf
+
+* `lxml 2.2`_, released 2009-03-21 (`changes for 2.2`_)
+
+* `lxml 2.2beta4`_, released 2009-02-27 (`changes for 2.2beta4`_)
* `lxml 2.2beta3`_, released 2009-02-17 (`changes for 2.2beta3`_)
@@ -316,6 +321,7 @@
* `lxml 0.5`_, released 2005-04-08
+.. _`lxml 2.2`: lxml-2.2.tgz
.. _`lxml 2.2beta4`: lxml-2.2beta4.tgz
.. _`lxml 2.2beta3`: lxml-2.2beta3.tgz
.. _`lxml 2.2beta2`: lxml-2.2beta2.tgz
@@ -364,6 +370,7 @@
.. _`lxml 0.5.1`: lxml-0.5.1.tgz
.. _`lxml 0.5`: lxml-0.5.tgz
+.. _`changes for 2.2`: changes-2.2.html
.. _`changes for 2.2beta4`: changes-2.2beta4.html
.. _`changes for 2.2beta3`: changes-2.2beta3.html
.. _`changes for 2.2beta2`: changes-2.2beta2.html
Modified: lxml/trunk/version.txt
==============================================================================
--- lxml/trunk/version.txt (original)
+++ lxml/trunk/version.txt Sat Mar 21 16:00:54 2009
@@ -1 +1 @@
-2.2beta4
+2.2
From scoder at codespeak.net Sat Mar 21 16:01:02 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 21 Mar 2009 16:01:02 +0100 (CET)
Subject: [Lxml-checkins] r63185 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20090321150102.3CBB51684A8@codespeak.net>
Author: scoder
Date: Sat Mar 21 16:01:01 2009
New Revision: 63185
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/lxml.etree.pyx
lxml/trunk/src/lxml/serializer.pxi
lxml/trunk/src/lxml/tests/test_etree.py
lxml/trunk/src/lxml/tree.pxd
Log:
r5093 at delle: sbehnel | 2009-03-21 15:54:31 +0100
support for standalone flag both in tree.docinfo and serialiser
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sat Mar 21 16:01:01 2009
@@ -5,6 +5,13 @@
2.2 (?)
=======
+Features added
+--------------
+
+* Support for ``standalone`` flag in XML declaration through
+ ``tree.docinfo.standalone`` and by passing ``standalone=True/False``
+ on serialisation.
+
Bugs fixed
----------
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Sat Mar 21 16:01:01 2009
@@ -317,6 +317,12 @@
encoding = funicode(c_doc.encoding)
return (version, encoding)
+ cdef isstandalone(self):
+ if self._c_doc.standalone == -1:
+ return None
+ else:
+ return (self._c_doc.standalone == 1)
+
cdef buildNewPrefix(self):
if self._ns_counter < python.PyTuple_GET_SIZE(_PREFIX_CACHE):
ns = python.PyTuple_GET_ITEM(_PREFIX_CACHE, self._ns_counter)
@@ -441,6 +447,17 @@
xml_version, encoding = self._doc.getxmlinfo()
return encoding
+ property standalone:
+ u"""Returns the standalone flag as declared by the document. The possible
+ values are True (``standalone='yes'``), False
+ (``standalone='no'`` or flag not provided in the declaration),
+ and None (unknown or no declaration found). Note that a
+ normal truth test on this value will always tell if the
+ ``standalone`` flag was set to ``'yes'`` or not.
+ """
+ def __get__(self):
+ return self._doc.isstandalone()
+
property URL:
u"The source URL of the document (or None if unknown)."
def __get__(self):
@@ -1622,10 +1639,10 @@
def write(self, file, *, encoding=None, method=u"xml",
pretty_print=False, xml_declaration=None, with_tail=True,
- compression=0):
+ standalone=None, compression=0):
u"""write(self, file, encoding=None, method="xml",
pretty_print=False, xml_declaration=None, with_tail=True,
- compression=0)
+ standalone=None, compression=0)
Write the tree to a filename, file or file-like object.
@@ -1634,9 +1651,14 @@
The keyword argument 'method' selects the output method: 'xml' or
'html'.
+ Passing a boolean value to the ``standalone`` option will
+ output an XML declaration with the corresponding
+ ``standalone`` flag.
+
The ``compression`` option enables GZip compression level 1-9.
"""
cdef bint write_declaration
+ cdef int is_standalone
self._assertHasRoot()
# suppress decl. in default case (purely for ElementTree compatibility)
if xml_declaration is not None:
@@ -1650,10 +1672,19 @@
encoding = encoding.upper()
write_declaration = encoding not in \
(u'US-ASCII', u'ASCII', u'UTF8', u'UTF-8')
+ if standalone is None:
+ is_standalone = -1
+ elif standalone:
+ write_declaration = 1
+ is_standalone = 1
+ else:
+ write_declaration = 1
+ is_standalone = 0
if compression is None or compression < 0:
compression = 0
_tofilelike(file, self._context_node, encoding, method,
- write_declaration, 1, pretty_print, with_tail, compression)
+ write_declaration, 1, pretty_print, with_tail,
+ is_standalone, compression)
def getpath(self, _Element element not None):
u"""getpath(self, element)
@@ -2537,9 +2568,11 @@
_dumpToFile(sys.stdout, elem._c_node, pretty_print, with_tail)
def tostring(element_or_tree, *, encoding=None, method=u"xml",
- xml_declaration=None, pretty_print=False, with_tail=True):
+ xml_declaration=None, pretty_print=False, with_tail=True,
+ standalone=None):
u"""tostring(element_or_tree, encoding=None, method="xml",
- xml_declaration=None, pretty_print=False, with_tail=True)
+ xml_declaration=None, pretty_print=False, with_tail=True,
+ standalone=None)
Serialize an element to an encoded string representation of its XML
tree.
@@ -2557,11 +2590,15 @@
The keyword argument 'method' selects the output method: 'xml',
'html' or plain 'text'.
+ Passing a boolean value to the ``standalone`` option will output
+ an XML declaration with the corresponding ``standalone`` flag.
+
You can prevent the tail text of the element from being serialised
by passing the boolean ``with_tail`` option. This has no impact
on the tail text of children, which will always be serialised.
"""
cdef bint write_declaration
+ cdef int is_standalone
if encoding is _unicode:
if xml_declaration:
raise ValueError, \
@@ -2575,14 +2612,23 @@
write_declaration = xml_declaration
if encoding is None:
encoding = u'ASCII'
+ if standalone is None:
+ is_standalone = -1
+ elif standalone:
+ write_declaration = 1
+ is_standalone = 1
+ else:
+ write_declaration = 1
+ is_standalone = 0
if isinstance(element_or_tree, _Element):
return _tostring(<_Element>element_or_tree, encoding, method,
- write_declaration, 0, pretty_print, with_tail)
+ write_declaration, 0, pretty_print, with_tail,
+ is_standalone)
elif isinstance(element_or_tree, _ElementTree):
return _tostring((<_ElementTree>element_or_tree)._context_node,
encoding, method, write_declaration, 1, pretty_print,
- with_tail)
+ with_tail, is_standalone)
else:
raise TypeError, u"Type '%s' cannot be serialized." % \
python._fqtypename(element_or_tree)
@@ -2601,7 +2647,7 @@
def tounicode(element_or_tree, *, method=u"xml", pretty_print=False,
with_tail=True):
u"""tounicode(element_or_tree, method="xml", pretty_print=False,
- with_tail=True)
+ with_tail=True)
Serialize an element to the Python unicode representation of its XML
tree.
@@ -2623,10 +2669,11 @@
"""
if isinstance(element_or_tree, _Element):
return _tostring(<_Element>element_or_tree, _unicode, method,
- 0, 0, pretty_print, with_tail)
+ 0, 0, pretty_print, with_tail, -1)
elif isinstance(element_or_tree, _ElementTree):
return _tostring((<_ElementTree>element_or_tree)._context_node,
- _unicode, method, 0, 1, pretty_print, with_tail)
+ _unicode, method, 0, 1, pretty_print, with_tail,
+ -1)
else:
raise TypeError, u"Type '%s' cannot be serialized." % \
type(element_or_tree)
Modified: lxml/trunk/src/lxml/serializer.pxi
==============================================================================
--- lxml/trunk/src/lxml/serializer.pxi (original)
+++ lxml/trunk/src/lxml/serializer.pxi Sat Mar 21 16:01:01 2009
@@ -75,7 +75,7 @@
cdef _tostring(_Element element, encoding, method,
bint write_xml_declaration, bint write_complete_document,
- bint pretty_print, bint with_tail):
+ bint pretty_print, bint with_tail, int standalone):
u"""Serialize an element to an encoded string representation of its XML
tree.
"""
@@ -111,7 +111,7 @@
with nogil:
_writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_method,
write_xml_declaration, write_complete_document,
- pretty_print, with_tail)
+ pretty_print, with_tail, standalone)
tree.xmlOutputBufferFlush(c_buffer)
if c_buffer.conv is not NULL:
c_result_buffer = c_buffer.conv
@@ -155,12 +155,13 @@
xmlNode* c_node, char* encoding, int c_method,
bint write_xml_declaration,
bint write_complete_document,
- bint pretty_print, bint with_tail) nogil:
+ bint pretty_print, bint with_tail,
+ int standalone) nogil:
cdef xmlDoc* c_doc
cdef xmlNode* c_nsdecl_node
c_doc = c_node.doc
if write_xml_declaration and c_method == OUTPUT_METHOD_XML:
- _writeDeclarationToBuffer(c_buffer, c_doc.version, encoding)
+ _writeDeclarationToBuffer(c_buffer, c_doc.version, encoding, standalone)
# write internal DTD subset, preceding PIs/comments, etc.
if write_complete_document:
@@ -203,14 +204,20 @@
tree.xmlOutputBufferWrite(c_buffer, 1, "\n")
cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
- char* version, char* encoding) nogil:
+ char* version, char* encoding,
+ int standalone) nogil:
if version is NULL:
version = "1.0"
tree.xmlOutputBufferWrite(c_buffer, 15, "\n")
+ if standalone == 0:
+ tree.xmlOutputBufferWrite(c_buffer, 20, "' standalone='no'?>\n")
+ elif standalone == 1:
+ tree.xmlOutputBufferWrite(c_buffer, 21, "' standalone='yes'?>\n")
+ else:
+ tree.xmlOutputBufferWrite(c_buffer, 4, "'?>\n")
cdef void _writeDtdToBuffer(tree.xmlOutputBuffer* c_buffer,
xmlDoc* c_doc, char* c_root_name,
@@ -351,7 +358,8 @@
cdef _tofilelike(f, _Element element, encoding, method,
bint write_xml_declaration, bint write_doctype,
- bint pretty_print, bint with_tail, int compression):
+ bint pretty_print, bint with_tail, int standalone,
+ int compression):
cdef python.PyThreadState* state = NULL
cdef _FilelikeWriter writer
cdef tree.xmlOutputBuffer* c_buffer
@@ -408,7 +416,7 @@
_writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_method,
write_xml_declaration, write_doctype,
- pretty_print, with_tail)
+ pretty_print, with_tail, standalone)
error_result = c_buffer.error
if error_result == xmlerror.XML_ERR_OK:
error_result = tree.xmlOutputBufferClose(c_buffer)
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Sat Mar 21 16:01:01 2009
@@ -2313,6 +2313,78 @@
result = tostring(a, with_tail=True)
self.assertEquals(result, _bytes("bTAILaTAIL"))
+ def test_standalone(self):
+ tostring = self.etree.tostring
+ XML = self.etree.XML
+ ElementTree = self.etree.ElementTree
+ Element = self.etree.Element
+
+ tree = Element("root").getroottree()
+ self.assertEquals(None, tree.docinfo.standalone)
+
+ tree = XML(_bytes("")).getroottree()
+ self.assertEquals(None, tree.docinfo.standalone)
+
+ tree = XML(_bytes(
+ "\n"
+ )).getroottree()
+ self.assertEquals(True, tree.docinfo.standalone)
+
+ tree = XML(_bytes(
+ "\n"
+ )).getroottree()
+ self.assertEquals(False, tree.docinfo.standalone)
+
+ def test_tostring_standalone(self):
+ tostring = self.etree.tostring
+ XML = self.etree.XML
+ ElementTree = self.etree.ElementTree
+
+ root = XML(_bytes(""))
+
+ tree = ElementTree(root)
+ self.assertEquals(None, tree.docinfo.standalone)
+
+ result = tostring(root, xml_declaration=True, encoding="ASCII")
+ self.assertEquals(result, _bytes(
+ "\n"))
+
+ result = tostring(root, xml_declaration=True, encoding="ASCII",
+ standalone=True)
+ self.assertEquals(result, _bytes(
+ "\n"))
+
+ tree = ElementTree(XML(result))
+ self.assertEquals(True, tree.docinfo.standalone)
+
+ result = tostring(root, xml_declaration=True, encoding="ASCII",
+ standalone=False)
+ self.assertEquals(result, _bytes(
+ "\n"))
+
+ tree = ElementTree(XML(result))
+ self.assertEquals(False, tree.docinfo.standalone)
+
+ def test_tostring_standalone_in_out(self):
+ tostring = self.etree.tostring
+ XML = self.etree.XML
+ ElementTree = self.etree.ElementTree
+
+ root = XML(_bytes(
+ "\n"))
+
+ tree = ElementTree(root)
+ self.assertEquals(True, tree.docinfo.standalone)
+
+ result = tostring(root, xml_declaration=True, encoding="ASCII")
+ self.assertEquals(result, _bytes(
+ "\n"))
+
+ result = tostring(root, xml_declaration=True, encoding="ASCII",
+ standalone=True)
+ self.assertEquals(result, _bytes(
+ "\n"))
+
def test_tostring_method_text_encoding(self):
tostring = self.etree.tostring
Element = self.etree.Element
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Sat Mar 21 16:01:01 2009
@@ -131,6 +131,7 @@
xmlDoc* doc
xmlDict* dict
xmlHashTable* ids
+ int standalone
char* version
char* encoding
char* URL
From scoder at codespeak.net Sat Mar 21 16:01:08 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 21 Mar 2009 16:01:08 +0100 (CET)
Subject: [Lxml-checkins] r63186 - in lxml/trunk: . doc
Message-ID: <20090321150108.261A91684C0@codespeak.net>
Author: scoder
Date: Sat Mar 21 16:01:06 2009
New Revision: 63186
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/doc/main.txt
Log:
r5094 at delle: sbehnel | 2009-03-21 15:58:20 +0100
prepare release of lxml 2.2
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sat Mar 21 16:01:06 2009
@@ -2,8 +2,8 @@
lxml changelog
==============
-2.2 (?)
-=======
+2.2 (2009-03-21)
+================
Features added
--------------
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Sat Mar 21 16:01:06 2009
@@ -147,8 +147,8 @@
source release. If you can't wait, consider trying a less recent
release version first.
-The latest version is `lxml 2.2beta4`_, released 2009-02-17
-(`changes for 2.2beta4`_). `Older versions`_ are listed below.
+The latest version is `lxml 2.2`_, released 2009-03-21
+(`changes for 2.2`_). `Older versions`_ are listed below.
Please take a look at the `installation instructions`_!
From scoder at codespeak.net Sat Mar 21 21:20:03 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 21 Mar 2009 21:20:03 +0100 (CET)
Subject: [Lxml-checkins] r63196 - lxml/tag/lxml-2.2
Message-ID: <20090321202003.DE810168491@codespeak.net>
Author: scoder
Date: Sat Mar 21 21:20:00 2009
New Revision: 63196
Added:
lxml/tag/lxml-2.2/
- copied from r63195, lxml/trunk/
Log:
tag for lxml 2.2
From scoder at codespeak.net Sat Mar 21 21:25:45 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 21 Mar 2009 21:25:45 +0100 (CET)
Subject: [Lxml-checkins] r63198 - lxml/trunk
Message-ID: <20090321202545.17C0C16848F@codespeak.net>
Author: scoder
Date: Sat Mar 21 21:25:44 2009
New Revision: 63198
Modified:
lxml/trunk/ (props changed)
lxml/trunk/setup.py
Log:
r5099 at delle: sbehnel | 2009-03-21 21:16:22 +0100
trove identifier: written in Cython
Modified: lxml/trunk/setup.py
==============================================================================
--- lxml/trunk/setup.py (original)
+++ lxml/trunk/setup.py Sat Mar 21 21:25:44 2009
@@ -93,6 +93,7 @@
'Intended Audience :: Developers',
'Intended Audience :: Information Technology',
'License :: OSI Approved :: BSD License',
+ 'Programming Language :: Cython',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.3',
'Programming Language :: Python :: 2.4',
From scoder at codespeak.net Fri Mar 27 20:22:29 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 27 Mar 2009 20:22:29 +0100 (CET)
Subject: [Lxml-checkins] r63394 - in lxml/trunk: . src/lxml
Message-ID: <20090327192229.0CD3716853A@codespeak.net>
Author: scoder
Date: Fri Mar 27 20:22:26 2009
New Revision: 63394
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/parser.pxi
Log:
r5102 at delle: sbehnel | 2009-03-23 09:04:23 +0100
uglify long docstring to make MSVC happy
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Fri Mar 27 20:22:26 2009
@@ -1164,12 +1164,7 @@
)
cdef class XMLParser(_FeedParser):
- u"""XMLParser(self, encoding=None, attribute_defaults=False, \
- dtd_validation=False, load_dtd=False, no_network=True, \
- ns_clean=False, recover=False, XMLSchema schema=None, \
- remove_blank_text=False, resolve_entities=True, \
- remove_comments=False, remove_pis=False, strip_cdata=True, \
- target=None, compact=True)
+ u"""XMLParser(self, encoding=None, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, XMLSchema schema=None, remove_blank_text=False, resolve_entities=True, remove_comments=False, remove_pis=False, strip_cdata=True, target=None, compact=True)
The XML parser.
From scoder at codespeak.net Fri Mar 27 20:22:34 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 27 Mar 2009 20:22:34 +0100 (CET)
Subject: [Lxml-checkins] r63395 - lxml/trunk
Message-ID: <20090327192234.66C24168552@codespeak.net>
Author: scoder
Date: Fri Mar 27 20:22:33 2009
New Revision: 63395
Modified:
lxml/trunk/ (props changed)
lxml/trunk/setupinfo.py
Log:
r5103 at delle: sbehnel | 2009-03-23 09:05:25 +0100
name fix
Modified: lxml/trunk/setupinfo.py
==============================================================================
--- lxml/trunk/setupinfo.py (original)
+++ lxml/trunk/setupinfo.py Fri Mar 27 20:22:33 2009
@@ -30,7 +30,7 @@
value = os.getenv(name)
if value:
value = decode_input(value)
- if os.platform == 'win32' and ';' in value:
+ if sys.platform == 'win32' and ';' in value:
return value.split(';')
else:
return value.split()
From scoder at codespeak.net Fri Mar 27 20:22:39 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 27 Mar 2009 20:22:39 +0100 (CET)
Subject: [Lxml-checkins] r63396 - lxml/trunk
Message-ID: <20090327192239.A078E16852E@codespeak.net>
Author: scoder
Date: Fri Mar 27 20:22:38 2009
New Revision: 63396
Modified:
lxml/trunk/ (props changed)
lxml/trunk/setup.py
Log:
r5104 at delle: sbehnel | 2009-03-27 16:02:40 +0100
URL fix
Modified: lxml/trunk/setup.py
==============================================================================
--- lxml/trunk/setup.py (original)
+++ lxml/trunk/setup.py Fri Mar 27 20:22:38 2009
@@ -65,7 +65,7 @@
maintainer="lxml dev team",
maintainer_email="lxml-dev at codespeak.net",
url="http://codespeak.net/lxml",
- download_url="http://cheeseshop.python.org/packages/source/l/lxml/lxml-%s.tar.gz" % versioninfo.version(),
+ download_url="http://pypi.python.org/packages/source/l/lxml/lxml-%s.tar.gz" % versioninfo.version(),
description="Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API.",
From scoder at codespeak.net Fri Mar 27 20:22:44 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 27 Mar 2009 20:22:44 +0100 (CET)
Subject: [Lxml-checkins] r63397 - in lxml/trunk: . src/lxml
Message-ID: <20090327192244.76E4F168557@codespeak.net>
Author: scoder
Date: Fri Mar 27 20:22:43 2009
New Revision: 63397
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/etree_defs.h
Log:
r5105 at delle: sbehnel | 2009-03-27 16:04:36 +0100
better error message when lib*-devel packages are missing
Modified: lxml/trunk/src/lxml/etree_defs.h
==============================================================================
--- lxml/trunk/src/lxml/etree_defs.h (original)
+++ lxml/trunk/src/lxml/etree_defs.h Fri Mar 27 20:22:43 2009
@@ -1,6 +1,21 @@
#ifndef HAS_ETREE_DEFS_H
#define HAS_ETREE_DEFS_H
+/* quick check for Python/libxml2/libxslt devel setup */
+#include "Python.h"
+#ifndef PY_VERSION_HEX
+# error the development package of Python (header files etc.) is not installed correctly
+#endif
+#include "libxml/xmlversion.h"
+#ifndef LIBXML_VERSION
+# error the development package of libxml2 (header files etc.) is not installed correctly
+#endif
+#include "libxslt/xsltconfig.h"
+#ifndef LIBXSLT_VERSION
+# error the development package of libxslt (header files etc.) is not installed correctly
+#endif
+
+
/* v_arg functions */
#define va_int(ap) va_arg(ap, int)
#define va_charptr(ap) va_arg(ap, char *)
@@ -51,7 +66,6 @@
#endif
/* libxml2 version specific setup */
-#include "libxml/xmlversion.h"
#if LIBXML_VERSION < 20621
/* (X|HT)ML_PARSE_COMPACT were added in libxml2 2.6.21 */
# define XML_PARSE_COMPACT 0
From scoder at codespeak.net Fri Mar 27 20:22:49 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 27 Mar 2009 20:22:49 +0100 (CET)
Subject: [Lxml-checkins] r63398 - in lxml/trunk: . doc
Message-ID: <20090327192249.4333B168552@codespeak.net>
Author: scoder
Date: Fri Mar 27 20:22:48 2009
New Revision: 63398
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/performance.txt
Log:
r5106 at delle: sbehnel | 2009-03-27 16:05:45 +0100
doc update
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Fri Mar 27 20:22:48 2009
@@ -256,12 +256,13 @@
The ElementTree API
===================
-Since all three libraries implement the same API, their performance is easy to
-compare in this area. A major disadvantage for lxml's performance is the
-different tree model that underlies libxml2. It allows lxml to provide parent
-pointers for elements, but also increases the overhead of tree building and
-restructuring. This can be seen from the tree setup times of the benchmark
-(given in seconds)::
+Since all three libraries implement the same API, their performance is
+easy to compare in this area. A major disadvantage for lxml's
+performance is the different tree model that underlies libxml2. It
+allows lxml to provide parent pointers for elements and full XPath
+support, but also increases the overhead of tree building and
+restructuring. This can be seen from the tree setup times of the
+benchmark (given in seconds)::
lxe: -- S- U- -A SA UA
T1: 0.0502 0.0572 0.0613 0.0494 0.0575 0.0615
@@ -281,10 +282,10 @@
While lxml is still a lot faster than ET in most cases, cET can be up
to five times faster than lxml here. One of the reasons is that lxml
-must additionally discard the created Python elements after their use,
-when they are no longer referenced. ET and cET represent the tree
-itself through these objects, which reduces the overhead in creating
-them.
+must encode incoming string data and tag names into UTF-8, and
+additionally discard the created Python elements after their use, when
+they are no longer referenced. ET and cET represent the tree itself
+through these objects, which reduces the overhead in creating them.
Child access
From scoder at codespeak.net Fri Mar 27 22:09:35 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 27 Mar 2009 22:09:35 +0100 (CET)
Subject: [Lxml-checkins] r63399 - in lxml/trunk: . doc src/lxml
Message-ID: <20090327210935.39618168557@codespeak.net>
Author: scoder
Date: Fri Mar 27 22:09:33 2009
New Revision: 63399
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/doc/parsing.txt
lxml/trunk/src/lxml/etree_defs.h
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/xmlparser.pxd
Log:
r5112 at delle: sbehnel | 2009-03-27 22:06:09 +0100
support 'huge_tree' parser option to disable security restrictions in libxml2 2.7
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Mar 27 22:09:33 2009
@@ -2,6 +2,22 @@
lxml changelog
==============
+Under development
+=================
+
+Features added
+--------------
+
+* Pass ``huge_tree`` parser option to disable parser security
+ restrictions imposed by libxml2 2.7.
+
+Bugs fixed
+----------
+
+Other changes
+-------------
+
+
2.2 (2009-03-21)
================
Modified: lxml/trunk/doc/parsing.txt
==============================================================================
--- lxml/trunk/doc/parsing.txt (original)
+++ lxml/trunk/doc/parsing.txt Fri Mar 27 22:09:33 2009
@@ -134,7 +134,8 @@
* load_dtd - load and parse the DTD while parsing (no validation is performed)
-* no_network - prevent network access when looking up external documents
+* no_network - prevent network access when looking up external
+ documents (on by default)
* ns_clean - try to clean up redundant namespace declarations
@@ -144,6 +145,17 @@
* remove_comments - discard comments
+* remove_pis - discard processing instructions
+
+* strip_cdata - replace CDATA sections by normal text content (on by
+ default)
+
+* resolve_entities - replace entities by their text value (on by
+ default)
+
+* huge_tree - disable security restrictions and support very deep trees
+ and very long text content (only affects libxml2 2.7+)
+
* compact - use compact storage for short text content (on by default)
Modified: lxml/trunk/src/lxml/etree_defs.h
==============================================================================
--- lxml/trunk/src/lxml/etree_defs.h (original)
+++ lxml/trunk/src/lxml/etree_defs.h Fri Mar 27 22:09:33 2009
@@ -75,6 +75,13 @@
# define HTML_PARSE_RECOVER XML_PARSE_RECOVER
#endif
+#if LIBXML_VERSION < 20700
+/* These were added in libxml2 2.7.0 */
+# define XML_PARSE_OLD10 0
+# define XML_PARSE_NOBASEFIX 0
+# define XML_PARSE_HUGE 0
+#endif
+
/* added to xmlsave API in libxml2 2.6.23 */
#if LIBXML_VERSION < 20623
# define xmlSaveToBuffer(buffer, encoding, options)
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Fri Mar 27 22:09:33 2009
@@ -1192,6 +1192,8 @@
- strip_cdata - replace CDATA sections by normal text content (default: True)
- compact - safe memory for short text content (default: True)
- resolve_entities - replace entities by their text value (default: True)
+ - huge_tree - disable security restrictions and support very deep trees
+ and very long text content (only affects libxml2 2.7+)
Other keyword arguments:
@@ -1206,7 +1208,7 @@
def __init__(self, *, encoding=None, attribute_defaults=False,
dtd_validation=False, load_dtd=False, no_network=True,
ns_clean=False, recover=False, XMLSchema schema=None,
- remove_blank_text=False, resolve_entities=True,
+ huge_tree=False, remove_blank_text=False, resolve_entities=True,
remove_comments=False, remove_pis=False, strip_cdata=True,
target=None, compact=True):
cdef int parse_options
@@ -1225,6 +1227,8 @@
parse_options = parse_options | xmlparser.XML_PARSE_RECOVER
if remove_blank_text:
parse_options = parse_options | xmlparser.XML_PARSE_NOBLANKS
+ if huge_tree:
+ parse_options = parse_options | xmlparser.XML_PARSE_HUGE
if not no_network:
parse_options = parse_options ^ xmlparser.XML_PARSE_NONET
if not compact:
@@ -1256,7 +1260,7 @@
def __init__(self, *, encoding=None, attribute_defaults=False,
dtd_validation=False, load_dtd=False, no_network=True,
ns_clean=False, recover=False, schema=None,
- remove_blank_text=False, resolve_entities=True,
+ huge_tree=False, remove_blank_text=False, resolve_entities=True,
remove_comments=True, remove_pis=True, strip_cdata=True,
target=None, compact=True):
XMLParser.__init__(self,
@@ -1267,6 +1271,7 @@
ns_clean=ns_clean,
recover=recover,
remove_blank_text=remove_blank_text,
+ huge_tree=huge_tree,
compact=compact,
resolve_entities=resolve_entities,
remove_comments=remove_comments,
Modified: lxml/trunk/src/lxml/xmlparser.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlparser.pxd (original)
+++ lxml/trunk/src/lxml/xmlparser.pxd Fri Mar 27 22:09:33 2009
@@ -126,6 +126,10 @@
XML_PARSE_NOXINCNODE = 32768 # do not generate XINCLUDE START/END nodes
# libxml2 2.6.21+ only:
XML_PARSE_COMPACT = 65536 # compact small text nodes
+ # libxml2 2.7.0+ only:
+ XML_PARSE_OLD10 = 131072 # parse using XML-1.0 before update 5
+ XML_PARSE_NOBASEFIX = 262144 # do not fixup XINCLUDE xml:base uris
+ XML_PARSE_HUGE = 524288 # relax any hardcoded limit from the parser
cdef void xmlInitParser() nogil
cdef void xmlCleanupParser() nogil
From scoder at codespeak.net Fri Mar 27 22:19:12 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 27 Mar 2009 22:19:12 +0100 (CET)
Subject: [Lxml-checkins] r63400 - in lxml/trunk: . src/lxml
Message-ID: <20090327211912.9B6FA16852E@codespeak.net>
Author: scoder
Date: Fri Mar 27 22:19:09 2009
New Revision: 63400
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/etree_defs.h
Log:
r5114 at delle: sbehnel | 2009-03-27 22:16:42 +0100
enable support for newer parser options even when compiled against an older libxml2 version
Modified: lxml/trunk/src/lxml/etree_defs.h
==============================================================================
--- lxml/trunk/src/lxml/etree_defs.h (original)
+++ lxml/trunk/src/lxml/etree_defs.h Fri Mar 27 22:19:09 2009
@@ -68,8 +68,8 @@
/* libxml2 version specific setup */
#if LIBXML_VERSION < 20621
/* (X|HT)ML_PARSE_COMPACT were added in libxml2 2.6.21 */
-# define XML_PARSE_COMPACT 0
-# define HTML_PARSE_COMPACT 0
+# define XML_PARSE_COMPACT 1 << 16
+# define HTML_PARSE_COMPACT XML_PARSE_COMPACT
/* HTML_PARSE_RECOVER was added in libxml2 2.6.21 */
# define HTML_PARSE_RECOVER XML_PARSE_RECOVER
@@ -77,9 +77,9 @@
#if LIBXML_VERSION < 20700
/* These were added in libxml2 2.7.0 */
-# define XML_PARSE_OLD10 0
-# define XML_PARSE_NOBASEFIX 0
-# define XML_PARSE_HUGE 0
+# define XML_PARSE_OLD10 1 << 17
+# define XML_PARSE_NOBASEFIX 1 << 18
+# define XML_PARSE_HUGE 1 << 19
#endif
/* added to xmlsave API in libxml2 2.6.23 */