From scoder at codespeak.net Mon May 1 16:39:21 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Mon May 1 16:39:23 2006
Subject: [Lxml-checkins] r26635 - lxml/trunk/src/lxml
Message-ID: <20060501143921.DA068100AD@code0.codespeak.net>
Author: scoder
Date: Mon May 1 16:39:21 2006
New Revision: 26635
Modified:
lxml/trunk/src/lxml/xslt.pxi
Log:
doc typo
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Mon May 1 16:39:21 2006
@@ -336,7 +336,7 @@
result._xslt = xslt
return result
-# do not register all libxslt extra function, provide only "node-set"
+# do not register all libxslt extra functions, provide only "node-set"
# functions like "output" and "write" are a potential security risk
#xslt.xsltRegisterAllExtras()
xslt.xsltRegisterExtModuleFunction("node-set",
From scoder at codespeak.net Mon May 1 17:08:53 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Mon May 1 17:08:54 2006
Subject: [Lxml-checkins] r26636 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20060501150853.5D92D100AD@code0.codespeak.net>
Author: scoder
Date: Mon May 1 17:08:50 2006
New Revision: 26636
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_etree.py
lxml/trunk/src/lxml/tree.pxd
Log:
doctype property for _ElementTree, returns (public ID, system URL) tuple based on libxml2 parsed DTD information
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Mon May 1 17:08:50 2006
@@ -7,6 +7,9 @@
Features added
--------------
+* Read-only 'doctype' attribute in ElementTree class that holds a tuple
+ (public ID, system URL) as seen by the parser
+
* etree module can be compiled without libxslt by commenting out the line
'include "xslt.pxi"' at the end of the etree.pyx source file
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Mon May 1 17:08:50 2006
@@ -129,6 +129,24 @@
return None
return _elementFactory(self, c_node)
+ cdef getdoctype(self):
+ cdef tree.xmlDtd* dtd
+ public_id = None
+ sys_url = None
+ dtd = self._c_doc.intSubset
+ if dtd is not NULL:
+ if dtd.ExternalID is not NULL:
+ public_id = funicode(dtd.ExternalID)
+ if dtd.SystemID is not NULL:
+ sys_url = funicode(dtd.SystemID)
+ dtd = self._c_doc.extSubset
+ if dtd is not NULL:
+ if not public_id and dtd.ExternalID is not NULL:
+ public_id = funicode(dtd.ExternalID)
+ if not sys_url and dtd.SystemID is not NULL:
+ sys_url = funicode(dtd.SystemID)
+ return (public_id, sys_url)
+
cdef buildNewPrefix(self):
ns = python.PyString_FromFormat("ns%d", self._ns_counter)
self._ns_counter = self._ns_counter + 1
@@ -233,7 +251,16 @@
def getroot(self):
return self._context_node
-
+
+ property doctype:
+ """A tuple (public ID, system URL) of the DOCTYPE seen by the parser.
+ Any of the two may be None. This value is only defined for
+ ElementTree objects based on the root node of a parsed document (e.g.
+ those returned by the parse functions).
+ """
+ def __get__(self):
+ return self._doc.getdoctype()
+
def write(self, file, encoding='us-ascii'):
if not hasattr(file, 'write'):
# file is a filename, we want a file object
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Mon May 1 17:08:50 2006
@@ -379,7 +379,36 @@
8, e.index(e[8], -12, -1))
self.assertEquals(
0, e.index(e[0], -12, -1))
-
+
+ def test_doctype_public(self):
+ etree = self.etree
+ pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
+ xml = '''\
+
+
+''' % (pub_id, sys_id)
+
+ tree = etree.parse(StringIO(xml))
+ self.assertEquals(tree.doctype, (pub_id, sys_id))
+
+ def test_doctype_system(self):
+ etree = self.etree
+ sys_id = "some.dtd"
+ xml = '''\
+
+
+''' % sys_id
+
+ tree = etree.parse(StringIO(xml))
+ self.assertEquals(tree.doctype, (None, sys_id))
+
+ def test_doctype_empty(self):
+ etree = self.etree
+ xml = ''
+ tree = etree.parse(StringIO(xml))
+ self.assertEquals(tree.doctype, (None, None))
+
def _writeElement(self, element, encoding='us-ascii'):
"""Write out element for comparison.
"""
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Mon May 1 17:08:50 2006
@@ -65,7 +65,11 @@
char* content
xmlAttr* properties
xmlNs* ns
-
+
+ ctypedef struct xmlDtd:
+ char* ExternalID
+ char* SystemID
+
ctypedef struct xmlDoc:
xmlElementType type
char* name
@@ -79,6 +83,8 @@
xmlHashTable* ids
char* URL
void* _private
+ xmlDtd* intSubset
+ xmlDtd* extSubset
ctypedef struct xmlAttr:
void* _private
From scoder at codespeak.net Tue May 2 07:46:18 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Tue May 2 07:46:19 2006
Subject: [Lxml-checkins] r26645 - in lxml/trunk: . src/lxml
Message-ID: <20060502054618.2116010091@code0.codespeak.net>
Author: scoder
Date: Tue May 2 07:46:16 2006
New Revision: 26645
Modified:
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/xmlparser.pxd
lxml/trunk/version.txt
Log:
do not destroy mal-formed parser results if the recover option is set
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Tue May 2 07:46:16 2006
@@ -149,7 +149,7 @@
cdef xmlDoc* _handleParseResult(xmlParserCtxt* ctxt, xmlDoc* result,
char* c_filename) except NULL:
cdef _ResolverContext context
- if ctxt.wellFormed:
+ if ctxt.wellFormed or (ctxt.options & xmlparser.XML_PARSE_RECOVER):
__GLOBAL_PARSER_CONTEXT._initDocDict(result)
elif result is not NULL:
# free broken document
@@ -158,7 +158,11 @@
if ctxt._private is not NULL:
context = <_ResolverContext>ctxt._private
- context._raise_if_stored()
+ if context._has_raised():
+ if result is not NULL:
+ tree.xmlFreeDoc(result)
+ result = NULL
+ context._raise_if_stored()
if result is NULL:
if c_filename is not NULL and \
Modified: lxml/trunk/src/lxml/xmlparser.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlparser.pxd (original)
+++ lxml/trunk/src/lxml/xmlparser.pxd Tue May 2 07:46:16 2006
@@ -15,6 +15,7 @@
xmlDict* dict
void* _private
int wellFormed
+ int options
xmlError lastError
ctypedef enum xmlParserOption:
Modified: lxml/trunk/version.txt
==============================================================================
--- lxml/trunk/version.txt (original)
+++ lxml/trunk/version.txt Tue May 2 07:46:16 2006
@@ -1 +1 @@
-0.9.1
+0.9.2
From scoder at codespeak.net Tue May 2 07:47:47 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Tue May 2 07:47:49 2006
Subject: [Lxml-checkins] r26646 - lxml/trunk
Message-ID: <20060502054747.D4DB510091@code0.codespeak.net>
Author: scoder
Date: Tue May 2 07:47:46 2006
New Revision: 26646
Modified:
lxml/trunk/version.txt
Log:
oops, one file slipped through
Modified: lxml/trunk/version.txt
==============================================================================
--- lxml/trunk/version.txt (original)
+++ lxml/trunk/version.txt Tue May 2 07:47:46 2006
@@ -1 +1 @@
-0.9.2
+0.9.1
From scoder at codespeak.net Tue May 2 07:56:31 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Tue May 2 07:56:33 2006
Subject: [Lxml-checkins] r26647 - lxml/trunk/doc
Message-ID: <20060502055631.E09E010091@code0.codespeak.net>
Author: scoder
Date: Tue May 2 07:56:31 2006
New Revision: 26647
Modified:
lxml/trunk/doc/api.txt
Log:
forgot to merge doctests from htmlparser branch
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Tue May 2 07:56:31 2006
@@ -19,13 +19,14 @@
>>> from StringIO import StringIO
-XMLParser
----------
+Parsers
+-------
-One of the differences is the parser. It is based on libxml2 and therefore
-only supports options that are backed by the library. Parsers take a number
-of keyword arguments. The following is an example for namespace cleanup
-during parsing, first with the default parser, then with a parametrized one::
+One of the differences is the parser. There is support for both XML and
+(broken) HTML. Both are based on libxml2 and therefore only support options
+that are backed by the library. Parsers take a number of keyword arguments.
+The following is an example for namespace cleanup during parsing, first with
+the default parser, then with a parametrized one::
>>> xml = ' '
@@ -38,6 +39,23 @@
>>> print lxml.etree.tostring(et.getroot())
+HTML parsing is similarly simple::
+
+ >>> broken_html = "test page title"
+
+ >>> parser = lxml.etree.HTMLParser()
+ >>> et = lxml.etree.parse(StringIO(broken_html), parser)
+
+ >>> print lxml.etree.tostring(et.getroot())
+ test page title
+
+Lxml has an HTML function, similar to the XML shortcut known from
+ElementTree::
+
+ >>> html = lxml.etree.HTML(broken_html)
+ >>> print lxml.etree.tostring(html)
+ test page title
+
Error handling on exceptions
----------------------------
From scoder at codespeak.net Tue May 2 08:04:43 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Tue May 2 08:04:46 2006
Subject: [Lxml-checkins] r26648 - lxml/trunk/doc
Message-ID: <20060502060443.6527B10091@code0.codespeak.net>
Author: scoder
Date: Tue May 2 08:04:40 2006
New Revision: 26648
Modified:
lxml/trunk/doc/api.txt
Log:
mention recover option for parsers in api.txt
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Tue May 2 08:04:40 2006
@@ -39,9 +39,11 @@
>>> print lxml.etree.tostring(et.getroot())
-HTML parsing is similarly simple::
+HTML parsing is similarly simple. The parsers have a ``recover`` keyword
+argument that the HTMLParser sets by default. It lets libxml2 try its best to
+return something usable without raising an exception::
- >>> broken_html = "test page title"
+ >>> broken_html = "test page title"
>>> parser = lxml.etree.HTMLParser()
>>> et = lxml.etree.parse(StringIO(broken_html), parser)
From scoder at codespeak.net Tue May 2 08:04:58 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Tue May 2 08:04:59 2006
Subject: [Lxml-checkins] r26649 - lxml/trunk/doc
Message-ID: <20060502060458.2389E10091@code0.codespeak.net>
Author: scoder
Date: Tue May 2 08:04:56 2006
New Revision: 26649
Modified:
lxml/trunk/doc/resolvers.txt
Log:
ReST updates
Modified: lxml/trunk/doc/resolvers.txt
==============================================================================
--- lxml/trunk/doc/resolvers.txt (original)
+++ lxml/trunk/doc/resolvers.txt Tue May 2 08:04:56 2006
@@ -14,21 +14,21 @@
... '' % url, context)
This defines a resolver that always returns a dynamically generated DTD
-fragment defining an entity. The 'url' argument passes the system URL of the
-requested document, the 'id' argument is the public ID. Note that any of
-these may be None. The context object is not normally used by client code.
+fragment defining an entity. The ``url`` argument passes the system URL of
+the requested document, the ``id`` argument is the public ID. Note that any
+of these may be None. The context object is not normally used by client code.
Resolving is based on three methods of the Resolver object that build internal
representations of the result document. The following methods exist:
-* 'resolve_string' takes a parsable string as result document
-* 'resolve_filename' takes a filename
-* 'resolve_file' takes an open file-like object that has at least a read() method
-* 'resolve_empty' resolves into an empty document
+* ``resolve_string`` takes a parsable string as result document
+* ``resolve_filename`` takes a filename
+* ``resolve_file`` takes an open file-like object that has at least a read() method
+* ``resolve_empty`` resolves into an empty document
-The 'resolve' method may choose to return None, in which case the next
+The ``resolve`` method may choose to return None, in which case the next
registered resolver (or the default resolver) is consulted. It is never
-called if the resolver returns the result of any of the above 'resolve_*'
+called if the resolver returns the result of any of the above ``resolve_*``
methods.
Resolvers are registered local to a parser::
@@ -90,11 +90,11 @@
...
... """
-Note that it needs to resolve two URIs: 'honk:test' when compiling the XSLT
-document (i.e. when resolving xsl:import and xsl:include elements) and
-'hoi:test' at transformation time, when calls to the 'document' function are
-resolved. If we now register different resolvers with two different parsers,
-we can parse our document twice in different resolver contexts::
+Note that it needs to resolve two URIs: ``honk:test`` when compiling the XSLT
+document (i.e. when resolving ``xsl:import`` and ``xsl:include`` elements) and
+``hoi:test`` at transformation time, when calls to the ``document`` function
+are resolved. If we now register different resolvers with two different
+parsers, we can parse our document twice in different resolver contexts::
>>> hoi_parser = etree.XMLParser()
>>> normal_doc = etree.parse(StringIO(xml_text), hoi_parser)
@@ -109,7 +109,8 @@
These contexts are important for the further behaviour of the documents. They
memorise their original parser so that the correct set of resolvers is used in
subsequent lookups. To compile the stylesheet, XSLT must resolve the
-honk:test URI in the xsl:include element. The "hoi" resolver cannot do that::
+``honk:test`` URI in the ``xsl:include`` element. The ``hoi`` resolver cannot
+do that::
>>> transform = etree.XSLT(normal_doc)
Traceback (most recent call last):
@@ -121,15 +122,15 @@
[...]
XSLTParseError: Cannot resolve URI honk:test
-However, if we use the "honk" resolver associated with the respective
+However, if we use the ``honk`` resolver associated with the respective
document, everything works fine::
>>> transform = etree.XSLT(honk_doc)
Resolving url honk:test as prefix honk ... done
Running the transform accesses the same parser context again, but since it now
-needs to resolve the "hoi" URI in the call to the document function, its
-"honk" resolver will fail to do so::
+needs to resolve the ``hoi`` URI in the call to the document function, its
+``honk`` resolver will fail to do so::
>>> result = transform(normal_doc)
Traceback (most recent call last):
@@ -146,7 +147,7 @@
[...]
XSLTApplyError: Cannot resolve URI hoi:test
-This can only be solved by adding a "hoi" resolver to the parser. Note that
+This can only be solved by adding a ``hoi`` resolver to the parser. Note that
adding it after parsing the XSL document will not work as parsed documents
remember the state of the parser at the time of their creation::
@@ -169,10 +170,10 @@
hoi-TEST
-We can see that the "hoi" resolver was called to generate a document that was
-then inserted into the result document by the XSLT transformation. Note that
-this is completely independent of the XML file you transform, as the URI is
-resolved from within the stylesheet context::
+We can see that the ``hoi`` resolver was called to generate a document that
+was then inserted into the result document by the XSLT transformation. Note
+that this is completely independent of the XML file you transform, as the URI
+is resolved from within the stylesheet context::
>>> result = transform(normal_doc)
Resolving url hoi:test as prefix honk ... failed
From scoder at codespeak.net Tue May 2 08:56:01 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Tue May 2 08:56:03 2006
Subject: [Lxml-checkins] r26652 - in lxml/trunk: doc src/lxml src/lxml/tests
Message-ID: <20060502065601.63F42100A7@code0.codespeak.net>
Author: scoder
Date: Tue May 2 08:55:58 2006
New Revision: 26652
Modified:
lxml/trunk/doc/api.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_etree.py
Log:
make ElementTree.doctype return a DocType object instead of a tuple, enables "" string building via str()
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Tue May 2 08:55:58 2006
@@ -58,6 +58,24 @@
>>> print lxml.etree.tostring(html)
test page title
+The use of the libxml2 parsers makes some additional information available at
+the API level. Currently, ElementTree objects can access the DOCTYPE
+information provided by a parsed document::
+
+ >>> pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ >>> sys_url = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
+ >>> doctype_string = '' % (pub_id, sys_url)
+ >>> xhtml = doctype_string + ''
+
+ >>> et = lxml.etree.parse(StringIO(xhtml))
+ >>> doctype = et.doctype
+ >>> print doctype.public_id
+ -//W3C//DTD XHTML 1.0 Transitional//EN
+ >>> print doctype.system_url
+ http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd
+ >>> str(doctype) == doctype_string
+ True
+
Error handling on exceptions
----------------------------
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Tue May 2 08:55:58 2006
@@ -131,6 +131,7 @@
cdef getdoctype(self):
cdef tree.xmlDtd* dtd
+ cdef xmlNode* c_root_node
public_id = None
sys_url = None
dtd = self._c_doc.intSubset
@@ -145,7 +146,12 @@
public_id = funicode(dtd.ExternalID)
if not sys_url and dtd.SystemID is not NULL:
sys_url = funicode(dtd.SystemID)
- return (public_id, sys_url)
+ c_root_node = tree.xmlDocGetRootElement(self._c_doc)
+ if c_root_node is NULL:
+ root_name = None
+ else:
+ root_name = funicode(c_root_node.name)
+ return (root_name, public_id, sys_url)
cdef buildNewPrefix(self):
ns = python.PyString_FromFormat("ns%d", self._ns_counter)
@@ -215,7 +221,33 @@
parser = __DEFAULT_PARSER
result._parser = parser.copy()
return result
-
+
+cdef class DocType:
+ "Hold Public ID and System URL of a DOCTYPE declaration."
+ cdef readonly object root_name
+ cdef readonly object public_id
+ cdef readonly object system_url
+ def __init__(self, tree):
+ cdef _Document doc
+ doc = _documentOrRaise(tree)
+ self.root_name, self.public_id, self.system_url = doc.getdoctype()
+ if not self.root_name and (self.public_id or self.system_url):
+ raise ValueError, "Could not find root node"
+
+ def __str__(self):
+ if self.public_id:
+ if self.system_url:
+ return '' % (
+ self.root_name, self.public_id, self.system_url)
+ else:
+ return '' % (
+ self.root_name, self.public_id)
+ elif self.system_url:
+ return '' % (
+ self.root_name, self.system_url)
+ else:
+ return ""
+
cdef class _NodeBase:
"""Base class to reference a document object and a libxml node.
@@ -259,7 +291,7 @@
those returned by the parse functions).
"""
def __get__(self):
- return self._doc.getdoctype()
+ return DocType(self._doc)
def write(self, file, encoding='us-ascii'):
if not hasattr(file, 'write'):
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Tue May 2 08:55:58 2006
@@ -384,30 +384,39 @@
etree = self.etree
pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
- xml = '''\
-
-
-''' % (pub_id, sys_id)
+ doctype_string = '' % (pub_id, sys_id)
+
+ xml = doctype_string + ''
tree = etree.parse(StringIO(xml))
- self.assertEquals(tree.doctype, (pub_id, sys_id))
+ doctype = tree.doctype
+ self.assertEquals(doctype.public_id, pub_id)
+ self.assertEquals(doctype.system_url, sys_id)
+ self.assertEquals(doctype.root_name, 'html')
+ self.assertEquals(str(doctype), doctype_string)
def test_doctype_system(self):
etree = self.etree
sys_id = "some.dtd"
- xml = '''\
-
-
-''' % sys_id
+ doctype_string = '' % sys_id
+ xml = doctype_string + ''
tree = etree.parse(StringIO(xml))
- self.assertEquals(tree.doctype, (None, sys_id))
+ doctype = tree.doctype
+ self.assertEquals(doctype.public_id, None)
+ self.assertEquals(doctype.system_url, sys_id)
+ self.assertEquals(doctype.root_name, 'html')
+ self.assertEquals(str(doctype), doctype_string)
def test_doctype_empty(self):
etree = self.etree
xml = ''
tree = etree.parse(StringIO(xml))
- self.assertEquals(tree.doctype, (None, None))
+ doctype = tree.doctype
+ self.assertEquals(doctype.public_id, None)
+ self.assertEquals(doctype.system_url, None)
+ self.assertEquals(doctype.root_name, 'html')
+ self.assertEquals(str(doctype), '')
def _writeElement(self, element, encoding='us-ascii'):
"""Write out element for comparison.
From scoder at codespeak.net Tue May 2 09:01:38 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Tue May 2 09:01:39 2006
Subject: [Lxml-checkins] r26653 - in lxml/trunk: . src/lxml
Message-ID: <20060502070138.94411100A7@code0.codespeak.net>
Author: scoder
Date: Tue May 2 09:01:37 2006
New Revision: 26653
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/etree.pyx
Log:
doc updates
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Tue May 2 09:01:37 2006
@@ -7,8 +7,8 @@
Features added
--------------
-* Read-only 'doctype' attribute in ElementTree class that holds a tuple
- (public ID, system URL) as seen by the parser
+* Read-only 'doctype' attribute in ElementTree class holds DOCTYPE information
+ as seen by the parser
* etree module can be compiled without libxslt by commenting out the line
'include "xslt.pxi"' at the end of the etree.pyx source file
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Tue May 2 09:01:37 2006
@@ -228,6 +228,7 @@
cdef readonly object public_id
cdef readonly object system_url
def __init__(self, tree):
+ "Create a DocType object for an ElementTree object or root Element."
cdef _Document doc
doc = _documentOrRaise(tree)
self.root_name, self.public_id, self.system_url = doc.getdoctype()
From scoder at codespeak.net Tue May 2 09:14:46 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Tue May 2 09:14:46 2006
Subject: [Lxml-checkins] r26654 - lxml/trunk/doc
Message-ID: <20060502071446.380AB100A0@code0.codespeak.net>
Author: scoder
Date: Tue May 2 09:14:44 2006
New Revision: 26654
Modified:
lxml/trunk/doc/compatibility.txt
Log:
doc updates
Modified: lxml/trunk/doc/compatibility.txt
==============================================================================
--- lxml/trunk/doc/compatibility.txt (original)
+++ lxml/trunk/doc/compatibility.txt Tue May 2 09:14:44 2006
@@ -2,7 +2,7 @@
=============================
A lot of care has been taken to ensure compatibility between etree and
-ElementTree. Nonetheless some differences and incompatibilities exist:
+ElementTree. Nonetheless some differences and incompatibilities exist:
* Importing etree is obviously different; etree uses a lower case
package name, while ElementTree a combination of upper-case and
@@ -25,22 +25,22 @@
# use
from lxml import etree as ElementTree
-* Some of the API of ElementTree has not yet been implemented and is
- thus missing in lxml.etree. Feel free to help out!
+* Some minor parts of the API of ElementTree have not yet been implemented and
+ are thus missing in lxml.etree. Feel free to help out!
* Then again, lxml.etree offers a lot more functionality, such as
XPath, XSLT, Relax NG, and XML Schema support, which (c)ElementTree
does not offer.
* ElementTree allows you to place an Element in two different trees as the
- same time. Thus, this::
+ same time. Thus, this::
a = Element('a')
b = SubElement(a, 'b')
c = Element('c')
c.append(b)
- Will result in the following tree a::
+ will result in the following tree a::
@@ -48,11 +48,10 @@
- In lxml, this behavior is different, because of lxml is built on top
- of a tree that maintains parent relationships for elements (like W3C
- DOM). This means an element can only exist in a single tree at the
- same time. Adding an element in some tree to another tree will cause
- this element to be moved.
+ In lxml, this behavior is different, because lxml is built on top of a tree
+ that maintains parent relationships for elements (like W3C DOM). This means
+ an element can only exist in a single tree at the same time. Adding an
+ element in some tree to another tree will cause this element to be moved.
So, for tree a we will get::
@@ -62,44 +61,43 @@
- Unfortunately this is a rather fundamental difference in behavior,
- which will be hard to solve. It won't affect some applications, but
- if you want to port code you do unfortunately have to make sure that
- it doesn't.
-
-* ElementTree has a bug when serializing an empty Comment (no text
- argument given) to XML, etree serializes this successfully.
-
-* When trying to set a subelement using __setitem__ that is in fact
- not an Element but some other object, etree raises a TypeError, and
- ElementTree raises an AssertionError.
-
-* ElementTree ignores comments when parsing XML, while etree will read
- them in and treat them as Comment elements.
-
-* Because etree is built on top of libxml2, which is namespace prefix
- aware, etree preserves namespaces declarations and prefixes while
- ElementTree tends to come up with its own prefixes (ns0, ns1,
- etc). When no namespace prefix is given however, etree creates
- ElementTree style prefixes as well.
-
-* etree has a 'prefix' attribute (read-only) on elements giving the
- Element's prefix, if this is known, and None otherwise (in case of
- no namespace at all, or default namespace). etree also allows a
- 'nsmap' dictionary which maps namespace prefix to namespace URI to
- be passed to the Element and SubElement element factories.
-
- These will be translated into namespace declarations on that
- element. This means that in the probably rare case that you need to
- construct an attribute called 'nsmap', you need to be aware that
- unlike in ElementTree, you cannot pass it as a keyword argument to
- the Element and SubElement factories directly.
-
-* etree elements can be copied using copy.deepcopy() and copy.copy(),
- just like ElementTree's. copy.copy() however does *not* create a
- shallow copy where elements are shared between trees, as this makes
- no sense in the context of libxml2 trees.
+ Unfortunately this is a rather fundamental difference in behavior, which
+ will be hard to solve. It won't affect some applications, but if you want
+ to port code you must unfortunately make sure that it doesn't.
+
+* ElementTree has a bug when serializing an empty Comment (no text argument
+ given) to XML, etree serializes this successfully.
+
+* When trying to set a subelement using __setitem__ that is in fact not an
+ Element but some other object, etree raises a TypeError, and ElementTree
+ raises an AssertionError.
+
+* ElementTree ignores comments when parsing XML, while etree will read them in
+ and treat them as Comment elements.
+
+* Because etree is built on top of libxml2, which is namespace prefix aware,
+ etree preserves namespaces declarations and prefixes while ElementTree tends
+ to come up with its own prefixes (ns0, ns1, etc). When no namespace prefix
+ is given however, etree creates ElementTree style prefixes as well.
+
+* etree has a 'prefix' attribute (read-only) on elements giving the Element's
+ prefix, if this is known, and None otherwise (in case of no namespace at
+ all, or default namespace).
+
+ etree further allows passing an 'nsmap' dictionary to the Element and
+ SubElement element factories to explicitly map namespace prefixes to
+ namespace URIs. These will be translated into namespace declarations on
+ that element. This means that in the probably rare case that you need to
+ construct an attribute called 'nsmap', you need to be aware that unlike in
+ ElementTree, you cannot pass it as a keyword argument to the Element and
+ SubElement factories directly.
+
+* etree elements can be copied using copy.deepcopy() and copy.copy(), just
+ like ElementTree's. copy.copy() however does *not* create a shallow copy
+ where elements are shared between trees, as this makes no sense in the
+ context of libxml2 trees. Note that lxml can deep-copy trees considerably
+ faster than than ElementTree.
* etree allows navigation to the parent of a node by the ``getparent()``
- method. This is not possible in ElementTree as the underlying tree
- model does not have this information.
+ method. This is not possible in ElementTree as the underlying tree model
+ does not have this information.
From scoder at codespeak.net Tue May 2 13:44:04 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Tue May 2 13:44:05 2006
Subject: [Lxml-checkins] r26657 - lxml/trunk/src/lxml/tests
Message-ID: <20060502114404.A2267100B5@code0.codespeak.net>
Author: scoder
Date: Tue May 2 13:44:03 2006
New Revision: 26657
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
stop running api.txt doctests twice (they should not be part of the ET test cases)
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Tue May 2 13:44:03 2006
@@ -1891,8 +1891,6 @@
suite.addTests([unittest.makeSuite(ETreeTestCase)])
if ElementTree:
suite.addTests([unittest.makeSuite(ElementTreeTestCase)])
- suite.addTests(
- [doctest.DocFileSuite('../../../doc/api.txt')])
return suite
if __name__ == '__main__':
From scoder at codespeak.net Tue May 2 13:45:02 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Tue May 2 13:45:03 2006
Subject: [Lxml-checkins] r26658 - lxml/trunk/src/lxml
Message-ID: <20060502114502.AB696100B5@code0.codespeak.net>
Author: scoder
Date: Tue May 2 13:45:01 2006
New Revision: 26658
Modified:
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tree.pxd
Log:
allow accessing URL, XML version and original encoding through DocType object
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Tue May 2 13:45:01 2006
@@ -153,6 +153,25 @@
root_name = funicode(c_root_node.name)
return (root_name, public_id, sys_url)
+ cdef getxmlinfo(self):
+ cdef xmlDoc* c_doc
+ c_doc = self._c_doc
+ if c_doc.version is NULL:
+ version = None
+ else:
+ version = c_doc.version
+ if c_doc.encoding is NULL:
+ encoding = None
+ else:
+ encoding = c_doc.encoding
+ return (version, encoding)
+
+ cdef getURL(self):
+ if self._c_doc.URL is NULL:
+ return None
+ else:
+ return self._c_doc.URL
+
cdef buildNewPrefix(self):
ns = python.PyString_FromFormat("ns%d", self._ns_counter)
self._ns_counter = self._ns_counter + 1
@@ -227,6 +246,9 @@
cdef readonly object root_name
cdef readonly object public_id
cdef readonly object system_url
+ cdef readonly object xml_version
+ cdef readonly object encoding
+ cdef readonly object URL
def __init__(self, tree):
"Create a DocType object for an ElementTree object or root Element."
cdef _Document doc
@@ -234,6 +256,8 @@
self.root_name, self.public_id, self.system_url = doc.getdoctype()
if not self.root_name and (self.public_id or self.system_url):
raise ValueError, "Could not find root node"
+ self.xml_version, self.encoding = doc.getxmlinfo()
+ self.URL = doc.getURL()
def __str__(self):
if self.public_id:
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Tue May 2 13:45:01 2006
@@ -81,6 +81,8 @@
xmlDoc* doc
xmlDict* dict
xmlHashTable* ids
+ char* version
+ char* encoding
char* URL
void* _private
xmlDtd* intSubset
From scoder at codespeak.net Tue May 2 13:57:14 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Tue May 2 13:57:15 2006
Subject: [Lxml-checkins] r26660 - lxml/trunk/src/lxml
Message-ID: <20060502115714.5ABA6100AA@code0.codespeak.net>
Author: scoder
Date: Tue May 2 13:57:13 2006
New Revision: 26660
Modified:
lxml/trunk/src/lxml/extensions.pxi
lxml/trunk/src/lxml/python.pxd
Log:
C-ification in _unwrapXPathObject(bool)
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Tue May 2 13:57:13 2006
@@ -227,7 +227,7 @@
elif xpathObj.type == xpath.XPATH_NODESET:
return _createNodeSetResult(xpathObj, doc)
elif xpathObj.type == xpath.XPATH_BOOLEAN:
- return bool(xpathObj.boolval)
+ return python.PyBool_FromLong(xpathObj.boolval)
elif xpathObj.type == xpath.XPATH_NUMBER:
return xpathObj.floatval
elif xpathObj.type == xpath.XPATH_STRING:
Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd (original)
+++ lxml/trunk/src/lxml/python.pxd Tue May 2 13:57:13 2006
@@ -17,6 +17,7 @@
cdef object PyString_FromStringAndSize(char* s, int size)
cdef object PyString_FromString(char* s)
cdef object PyString_FromFormat(char* format, ...)
+ cdef object PyBool_FromLong(long value)
cdef int PyList_GET_SIZE(object l)
cdef int PyList_Append(object l, object obj)
From scoder at codespeak.net Tue May 2 13:57:43 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Tue May 2 13:57:44 2006
Subject: [Lxml-checkins] r26661 - lxml/trunk/doc
Message-ID: <20060502115743.73997100AA@code0.codespeak.net>
Author: scoder
Date: Tue May 2 13:57:42 2006
New Revision: 26661
Modified:
lxml/trunk/doc/api.txt
Log:
doc updates, more API doc tests
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Tue May 2 13:57:42 2006
@@ -60,12 +60,14 @@
The use of the libxml2 parsers makes some additional information available at
the API level. Currently, ElementTree objects can access the DOCTYPE
-information provided by a parsed document::
+information provided by a parsed document, as well as the XML version and the
+original encoding::
>>> pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
>>> sys_url = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
>>> doctype_string = '' % (pub_id, sys_url)
- >>> xhtml = doctype_string + ''
+ >>> xml_header = ''
+ >>> xhtml = xml_header + doctype_string + ''
>>> et = lxml.etree.parse(StringIO(xhtml))
>>> doctype = et.doctype
@@ -76,6 +78,11 @@
>>> str(doctype) == doctype_string
True
+ >>> print doctype.xml_version
+ 1.0
+ >>> print doctype.encoding
+ ascii
+
Error handling on exceptions
----------------------------
@@ -109,23 +116,20 @@
xpath method on ElementTree, Element
------------------------------------
-lxml.etree extends the ElementTree and Element interfaces with an
-xpath method. For ElementTree, the xpath method performs a global
-xpath query against the document. When xpath is used on an element,
-the xpath expression is performed taking the element as the xpath
-context node.
-
-You call the xpath() method with the XPath expression to use, and
-optionally a second namespaces argument, which should be a dictionary
-mapping namespace prefixes to be used in the XPath expression to
-namespace URIs.
+lxml.etree extends the ElementTree and Element interfaces with an xpath
+method. For ElementTree, the xpath method performs a global xpath query
+against the document. When xpath is used on an element, the xpath expression
+is performed taking the element as the xpath context node.
+
+You call the xpath() method with the XPath expression to use. Optionally, you
+can provide a second argument, which should be a dictionary mapping the
+namespace prefixes used in the XPath expression to namespace URIs.
-The return values of xpath vary, depending on the XPath expression
-used:
+The return values of xpath vary, depending on the XPath expression used:
-* 1 or 0, when the XPath expression has a boolean result
+* True or False, when the XPath expression has a boolean result
-* a float, when the XPath expression has a floating point result
+* a float, when the XPath expression has a numeric result (integer or float)
* a (unicode) string, when the XPath expression has a string result.
@@ -189,7 +193,7 @@
>>> doc = lxml.etree.parse(f)
>>> result = transform(doc)
-The result object can accessed like a normal ElementTree document::
+The result object can be accessed like a normal ElementTree document::
>>> result.getroot().text
'Text'
@@ -230,9 +234,9 @@
'\nText \n'
There's also a convenience method on the tree object for doing XSL
-transformations. This is less efficient if you want to apply the same XSL
-transformation to multiple documents, but is shorter to write, as you do not
-have to instantiate a stylesheet yourself::
+transformations. This is less efficient if you want to apply the same XSL
+transformation to multiple documents, but is shorter to write for one-shot
+operations, as you do not have to instantiate a stylesheet yourself::
>>> result = doc.xslt(xslt_doc, a="'A'")
>>> str(result)
@@ -281,13 +285,18 @@
invalid!
If you prefer getting an exception when validating, you can use the
-assertValid method::
+``assert_`` or ``assertValid`` methods::
>>> relaxng.assertValid(doc2)
Traceback (most recent call last):
[...]
DocumentInvalid: Document does not comply with schema
+ >>> relaxng.assert_(doc2)
+ Traceback (most recent call last):
+ [...]
+ AssertionError: Document does not comply with schema
+
Starting with version 0.9, lxml now has a simple API to report the errors
generated by libxml2. If you want to find out why the validation failed in the
second case, you can look up the error log of the validation process and check
@@ -300,7 +309,9 @@
You can see that the error (ERROR) happened during RelaxNG validation
(RELAXNGV). The message then tells you what went wrong. Note that this error
is local to the RelaxNG object. It will only contain log entries that
-appeares during the validation.
+appeares during the validation. The DocumentInvalid exception raised by the
+``assertValid`` method above provides access to the global error log (like all
+other lxml exceptions).
Similar to XSLT, there's also a less efficient but easier shortcut method to
do one-shot RelaxNG validation::
@@ -356,13 +367,18 @@
invalid!
If you prefer getting an exception when validating, you can use the
-assertValid method::
+``assert_`` or ``assertValid`` methods::
>>> xmlschema.assertValid(doc2)
Traceback (most recent call last):
[...]
DocumentInvalid: Document does not comply with schema
+ >>> xmlschema.assert_(doc2)
+ Traceback (most recent call last):
+ [...]
+ AssertionError: Document does not comply with schema
+
Error reporting works like for the RelaxNG class::
>>> log = xmlschema.error_log
From scoder at codespeak.net Tue May 2 20:32:39 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Tue May 2 20:32:41 2006
Subject: [Lxml-checkins] r26682 - in lxml/trunk: . doc src/lxml
src/lxml/tests
Message-ID: <20060502183239.E5E36100BB@code0.codespeak.net>
Author: scoder
Date: Tue May 2 20:32:38 2006
New Revision: 26682
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/api.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_etree.py
Log:
renamed 'doctype' attribute to 'docinfo': more generic name for more generic information
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Tue May 2 20:32:38 2006
@@ -7,8 +7,8 @@
Features added
--------------
-* Read-only 'doctype' attribute in ElementTree class holds DOCTYPE information
- as seen by the parser
+* Read-only 'docinfo' attribute in ElementTree class holds DOCTYPE
+ information, original encoding and XML version as seen by the parser
* etree module can be compiled without libxslt by commenting out the line
'include "xslt.pxi"' at the end of the etree.pyx source file
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Tue May 2 20:32:38 2006
@@ -70,17 +70,17 @@
>>> xhtml = xml_header + doctype_string + ''
>>> et = lxml.etree.parse(StringIO(xhtml))
- >>> doctype = et.doctype
- >>> print doctype.public_id
+ >>> docinfo = et.docinfo
+ >>> print docinfo.public_id
-//W3C//DTD XHTML 1.0 Transitional//EN
- >>> print doctype.system_url
+ >>> print docinfo.system_url
http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd
- >>> str(doctype) == doctype_string
+ >>> docinfo.doctype == doctype_string
True
- >>> print doctype.xml_version
+ >>> print docinfo.xml_version
1.0
- >>> print doctype.encoding
+ >>> print docinfo.encoding
ascii
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Tue May 2 20:32:38 2006
@@ -241,8 +241,8 @@
result._parser = parser.copy()
return result
-cdef class DocType:
- "Hold Public ID and System URL of a DOCTYPE declaration."
+cdef class DocInfo:
+ "Document information provided by parser and DTD."
cdef readonly object root_name
cdef readonly object public_id
cdef readonly object system_url
@@ -250,7 +250,7 @@
cdef readonly object encoding
cdef readonly object URL
def __init__(self, tree):
- "Create a DocType object for an ElementTree object or root Element."
+ "Create a DocInfo object for an ElementTree object or root Element."
cdef _Document doc
doc = _documentOrRaise(tree)
self.root_name, self.public_id, self.system_url = doc.getdoctype()
@@ -259,19 +259,20 @@
self.xml_version, self.encoding = doc.getxmlinfo()
self.URL = doc.getURL()
- def __str__(self):
- if self.public_id:
- if self.system_url:
- return '' % (
- self.root_name, self.public_id, self.system_url)
+ property doctype:
+ def __get__(self):
+ if self.public_id:
+ if self.system_url:
+ return '' % (
+ self.root_name, self.public_id, self.system_url)
+ else:
+ return '' % (
+ self.root_name, self.public_id)
+ elif self.system_url:
+ return '' % (
+ self.root_name, self.system_url)
else:
- return '' % (
- self.root_name, self.public_id)
- elif self.system_url:
- return '' % (
- self.root_name, self.system_url)
- else:
- return ""
+ return ""
cdef class _NodeBase:
"""Base class to reference a document object and a libxml node.
@@ -309,14 +310,13 @@
def getroot(self):
return self._context_node
- property doctype:
- """A tuple (public ID, system URL) of the DOCTYPE seen by the parser.
- Any of the two may be None. This value is only defined for
- ElementTree objects based on the root node of a parsed document (e.g.
- those returned by the parse functions).
+ property docinfo:
+ """Information about the document provided by parser and DTD. This
+ value is only defined for ElementTree objects based on the root node
+ of a parsed document (e.g. those returned by the parse functions).
"""
def __get__(self):
- return DocType(self._doc)
+ return DocInfo(self._doc)
def write(self, file, encoding='us-ascii'):
if not hasattr(file, 'write'):
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Tue May 2 20:32:38 2006
@@ -380,43 +380,51 @@
self.assertEquals(
0, e.index(e[0], -12, -1))
- def test_doctype_public(self):
+ def test_docinfo_public(self):
etree = self.etree
+ xml_header = ''
pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
doctype_string = '' % (pub_id, sys_id)
- xml = doctype_string + ''
+ xml = xml_header + doctype_string + ''
tree = etree.parse(StringIO(xml))
- doctype = tree.doctype
- self.assertEquals(doctype.public_id, pub_id)
- self.assertEquals(doctype.system_url, sys_id)
- self.assertEquals(doctype.root_name, 'html')
- self.assertEquals(str(doctype), doctype_string)
+ docinfo = tree.docinfo
+ self.assertEquals(docinfo.encoding, "ascii")
+ self.assertEquals(docinfo.xml_version, "1.0")
+ self.assertEquals(docinfo.public_id, pub_id)
+ self.assertEquals(docinfo.system_url, sys_id)
+ self.assertEquals(docinfo.root_name, 'html')
+ self.assertEquals(docinfo.doctype, doctype_string)
- def test_doctype_system(self):
+ def test_docinfo_system(self):
etree = self.etree
+ xml_header = ''
sys_id = "some.dtd"
doctype_string = '' % sys_id
- xml = doctype_string + ''
+ xml = xml_header + doctype_string + ''
tree = etree.parse(StringIO(xml))
- doctype = tree.doctype
- self.assertEquals(doctype.public_id, None)
- self.assertEquals(doctype.system_url, sys_id)
- self.assertEquals(doctype.root_name, 'html')
- self.assertEquals(str(doctype), doctype_string)
+ docinfo = tree.docinfo
+ self.assertEquals(docinfo.encoding, "UTF-8")
+ self.assertEquals(docinfo.xml_version, "1.0")
+ self.assertEquals(docinfo.public_id, None)
+ self.assertEquals(docinfo.system_url, sys_id)
+ self.assertEquals(docinfo.root_name, 'html')
+ self.assertEquals(docinfo.doctype, doctype_string)
- def test_doctype_empty(self):
+ def test_docinfo_empty(self):
etree = self.etree
xml = ''
tree = etree.parse(StringIO(xml))
- doctype = tree.doctype
- self.assertEquals(doctype.public_id, None)
- self.assertEquals(doctype.system_url, None)
- self.assertEquals(doctype.root_name, 'html')
- self.assertEquals(str(doctype), '')
+ docinfo = tree.docinfo
+ self.assertEquals(docinfo.encoding, None)
+ self.assertEquals(docinfo.xml_version, "1.0")
+ self.assertEquals(docinfo.public_id, None)
+ self.assertEquals(docinfo.system_url, None)
+ self.assertEquals(docinfo.root_name, 'html')
+ self.assertEquals(docinfo.doctype, '')
def _writeElement(self, element, encoding='us-ascii'):
"""Write out element for comparison.
From scoder at codespeak.net Wed May 3 09:10:53 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 3 09:10:55 2006
Subject: [Lxml-checkins] r26702 - lxml/trunk
Message-ID: <20060503071053.0F074100BB@code0.codespeak.net>
Author: scoder
Date: Wed May 3 09:10:51 2006
New Revision: 26702
Modified:
lxml/trunk/setup.py
Log:
check for SVN revision and store lxml version in lxml-version.h
Modified: lxml/trunk/setup.py
==============================================================================
--- lxml/trunk/setup.py (original)
+++ lxml/trunk/setup.py Wed May 3 09:10:51 2006
@@ -1,12 +1,33 @@
-import os
+import sys, os, os.path, re
def flags(cmd):
wf, rf, ef = os.popen3(cmd)
return rf.read().strip().split(' ')
+src_dir = os.path.join(os.getcwd(), os.path.dirname(sys.argv[0]))
+version = open(os.path.join(src_dir, 'version.txt')).read().strip()
+
+try:
+ svn_entries = open(os.path.join(src_dir, '.svn', 'entries')).read()
+except IOError:
+ svn_version = version
+else:
+ revision = re.search("]*name=\"\"[^>]*revision=\"([^\"]+)\"",
+ svn_entries).group(1)
+ svn_version = version + '-' + revision
+
+version_h = open(os.path.join(src_dir, 'src', 'lxml', 'lxml-version.h'), 'w')
+version_h.write('''\
+#ifndef LXML_VERSION_STRING
+#define LXML_VERSION_STRING "%s"
+#endif
+''' % svn_version)
+version_h.close()
+
+print "Building lxml version", svn_version
+
setup_args = {}
changelog_text = ""
-version = open('version.txt').read().strip()
try:
from setuptools import setup
From scoder at codespeak.net Wed May 3 09:12:57 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 3 09:13:00 2006
Subject: [Lxml-checkins] r26703 - lxml/trunk/src/lxml
Message-ID: <20060503071257.EF3F9100BB@code0.codespeak.net>
Author: scoder
Date: Wed May 3 09:12:56 2006
New Revision: 26703
Modified:
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tree.pxd
lxml/trunk/src/lxml/xslt.pxd
lxml/trunk/src/lxml/xslt.pxi
Log:
provide versions of lxml/libxml2/libxslt at module level
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Wed May 3 09:12:56 2006
@@ -56,6 +56,32 @@
class C14NError(LxmlError):
pass
+# version information
+cdef __unpackDottedVersion(version):
+ version_list = []
+ l = (version.replace('-', '.').split('.') + [0]*4)[:4]
+ for item in l:
+ try:
+ version_list.append(int(item))
+ except ValueError:
+ version_list.append(item)
+ return tuple(version_list)
+
+cdef __unpackIntVersion(int c_version):
+ return (
+ ((c_version / (100*100)) % 100),
+ ((c_version / 100) % 100),
+ (c_version % 100)
+ )
+
+LIBXML_COMPILED_VERSION = __unpackIntVersion(tree.LIBXML_VERSION)
+try:
+ LIBXML_VERSION = __unpackIntVersion(
+ int((tree.xmlParserVersion).split('-')[0]))
+except Exception:
+ LIBXML_VERSION = (0,0,0)
+LXML_VERSION = __unpackDottedVersion(tree.LXML_VERSION_STRING)
+
# class for temporary storage of Python references
cdef class _TempStore:
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Wed May 3 09:12:56 2006
@@ -5,7 +5,14 @@
cdef int strlen(char* s)
cdef int strcmp(char* s1, char* s2)
cdef int strncmp(char* s1, char* s2, int len)
-
+
+cdef extern from "lxml-version.h":
+ cdef char* LXML_VERSION_STRING
+
+cdef extern from "libxml/xmlversion.h":
+ cdef char* xmlParserVersion
+ cdef int LIBXML_VERSION
+
cdef extern from "libxml/encoding.h":
ctypedef struct xmlCharEncodingHandler
cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name)
Modified: lxml/trunk/src/lxml/xslt.pxd
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxd (original)
+++ lxml/trunk/src/lxml/xslt.pxd Wed May 3 09:12:56 2006
@@ -1,6 +1,12 @@
from tree cimport xmlDoc, xmlDict
from xpath cimport xmlXPathContext, xmlXPathFunction
+cdef extern from "libxslt/xslt.h":
+ cdef int xsltLibxsltVersion
+
+cdef extern from "libxslt/xsltconfig.h":
+ cdef int LIBXSLT_VERSION
+
cdef extern from "libxslt/xsltInternals.h":
ctypedef struct xsltDocument:
xmlDoc* doc
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Wed May 3 09:12:56 2006
@@ -17,6 +17,9 @@
class XSLTExtensionError(XSLTError):
pass
+# version information
+LIBXSLT_COMPILED_VERSION = __unpackIntVersion(xslt.LIBXSLT_VERSION)
+LIBXSLT_VERSION = __unpackIntVersion(xslt.xsltLibxsltVersion)
cdef void _logLibxsltErrors():
xslt.xsltSetGenericErrorFunc(NULL, _receiveGenericError)
From scoder at codespeak.net Wed May 3 09:21:49 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 3 09:21:51 2006
Subject: [Lxml-checkins] r26705 - lxml/trunk/src/lxml
Message-ID: <20060503072149.73146100BB@code0.codespeak.net>
Author: scoder
Date: Wed May 3 09:21:47 2006
New Revision: 26705
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
pass 'recover' option to _handleParseResult explicitly to override libxml2 parse options (relying on them fails on certain libxml2 versions)
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Wed May 3 09:21:47 2006
@@ -147,9 +147,9 @@
c_ctxt._private = self._context
cdef xmlDoc* _handleParseResult(xmlParserCtxt* ctxt, xmlDoc* result,
- char* c_filename) except NULL:
+ char* c_filename, int recover) except NULL:
cdef _ResolverContext context
- if ctxt.wellFormed or (ctxt.options & xmlparser.XML_PARSE_RECOVER):
+ if ctxt.wellFormed or recover:
__GLOBAL_PARSER_CONTEXT._initDocDict(result)
elif result is not NULL:
# free broken document
@@ -254,7 +254,7 @@
"""
cdef xmlDoc* result
cdef xmlParserCtxt* pctxt
- cdef int parse_error
+ cdef int recover
self._error_log.connect()
pctxt = self._memory_parser_ctxt
if pctxt is NULL:
@@ -264,11 +264,13 @@
result = xmlparser.xmlCtxtReadDoc(
pctxt, c_text, NULL, NULL, self._parse_options)
self._error_log.disconnect()
- return _handleParseResult(pctxt, result, NULL)
+ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
+ return _handleParseResult(pctxt, result, NULL, recover)
cdef xmlDoc* _parseDocFromFile(self, char* c_filename) except NULL:
cdef xmlDoc* result
cdef xmlParserCtxt* pctxt
+ cdef int recover
self._error_log.connect()
pctxt = self._file_parser_ctxt
if pctxt is NULL:
@@ -278,13 +280,15 @@
result = xmlparser.xmlCtxtReadFile(
pctxt, c_filename, NULL, self._parse_options)
self._error_log.disconnect()
- return _handleParseResult(pctxt, result, c_filename)
+ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
+ return _handleParseResult(pctxt, result, c_filename, recover)
cdef xmlDoc* _internalParseDoc(char* c_text, int options,
_ResolverContext context) except NULL:
# internal parser function for XSLT
cdef xmlParserCtxt* pctxt
cdef xmlDoc* c_doc
+ cdef int recover
pctxt = xmlparser.xmlNewParserCtxt()
if pctxt is NULL:
return NULL
@@ -293,7 +297,8 @@
c_doc = xmlparser.xmlCtxtReadDoc(
pctxt, c_text, NULL, NULL, options)
try:
- c_doc = _handleParseResult(pctxt, c_doc, NULL)
+ recover = options & xmlparser.XML_PARSE_RECOVER
+ c_doc = _handleParseResult(pctxt, c_doc, NULL, recover)
finally:
xmlparser.xmlFreeParserCtxt(pctxt)
return c_doc
@@ -303,6 +308,7 @@
# internal parser function for XSLT
cdef xmlParserCtxt* pctxt
cdef xmlDoc* c_doc
+ cdef int recover
pctxt = xmlparser.xmlNewParserCtxt()
if pctxt is NULL:
return NULL
@@ -311,7 +317,8 @@
c_doc = xmlparser.xmlCtxtReadFile(
pctxt, c_filename, NULL, options)
try:
- c_doc = _handleParseResult(pctxt, c_doc, c_filename)
+ recover = options & xmlparser.XML_PARSE_RECOVER
+ c_doc = _handleParseResult(pctxt, c_doc, c_filename, recover)
finally:
xmlparser.xmlFreeParserCtxt(pctxt)
return c_doc
@@ -400,6 +407,7 @@
cdef xmlDoc* result
cdef xmlParserCtxt* pctxt
cdef int c_len
+ cdef int recover
self._error_log.connect()
pctxt = self._memory_parser_ctxt
if pctxt is NULL:
@@ -412,12 +420,13 @@
result = htmlparser.htmlCtxtReadDoc(
pctxt, c_text, NULL, NULL, self._parse_options)
self._error_log.disconnect()
- return _handleParseResult(pctxt, result, NULL)
+ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
+ return _handleParseResult(pctxt, result, NULL, recover)
cdef xmlDoc* _parseDocFromFile(self, char* c_filename) except NULL:
cdef xmlDoc* result
cdef xmlParserCtxt* pctxt
- cdef int parser_error
+ cdef int recover
self._error_log.connect()
pctxt = self._file_parser_ctxt
if pctxt is NULL:
@@ -433,7 +442,8 @@
result = htmlparser.htmlCtxtReadFile(
pctxt, c_filename, NULL, self._parse_options)
self._error_log.disconnect()
- return _handleParseResult(pctxt, result, c_filename)
+ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
+ return _handleParseResult(pctxt, result, c_filename, recover)
cdef HTMLParser __DEFAULT_HTML_PARSER
__DEFAULT_HTML_PARSER = HTMLParser()
From scoder at codespeak.net Wed May 3 09:48:41 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 3 09:48:42 2006
Subject: [Lxml-checkins] r26707 - lxml/trunk/src/lxml
Message-ID: <20060503074841.EA90A100C6@code0.codespeak.net>
Author: scoder
Date: Wed May 3 09:48:40 2006
New Revision: 26707
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
allow setting 'recover' option in XMLParser()
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Wed May 3 09:48:40 2006
@@ -208,7 +208,8 @@
cdef xmlParserCtxt* _file_parser_ctxt
cdef xmlParserCtxt* _memory_parser_ctxt
def __init__(self, attribute_defaults=False, dtd_validation=False,
- load_dtd=False, no_network=False, ns_clean=False):
+ load_dtd=False, no_network=False, ns_clean=False,
+ recover=False):
cdef int parse_options
self._file_parser_ctxt = NULL
BaseParser.__init__(self)
@@ -226,6 +227,8 @@
parse_options = parse_options | xmlparser.XML_PARSE_NONET
if ns_clean:
parse_options = parse_options | xmlparser.XML_PARSE_NSCLEAN
+ if recover:
+ parse_options = parse_options | xmlparser.XML_PARSE_RECOVER
self._parse_options = parse_options
From scoder at codespeak.net Wed May 3 09:59:07 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 3 09:59:07 2006
Subject: [Lxml-checkins] r26708 - lxml/trunk/src/lxml/tests
Message-ID: <20060503075907.26E2A100C6@code0.codespeak.net>
Author: scoder
Date: Wed May 3 09:59:06 2006
New Revision: 26708
Modified:
lxml/trunk/src/lxml/tests/test_htmlparser.py
Log:
updated test cases for broken HTML
Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_htmlparser.py (original)
+++ lxml/trunk/src/lxml/tests/test_htmlparser.py Wed May 3 09:59:06 2006
@@ -15,7 +15,7 @@
etree = etree
html_str = "test page title "
- broken_html_str = "test page title"
+ broken_html_str = "test page title"
def tearDown(self):
self.etree.set_default_parser()
@@ -32,6 +32,13 @@
self.assertRaises(self.etree.XMLSyntaxError,
parse, f, parser)
+ def test_module_parse_html_norecover(self):
+ parser = self.etree.HTMLParser(recover=False)
+ parse = self.etree.parse
+ f = StringIO(self.broken_html_str)
+ self.assertRaises(self.etree.XMLSyntaxError,
+ parse, f, parser)
+
def test_module_HTML_broken(self):
element = self.etree.HTML(self.broken_html_str)
self.assertEqual(self.etree.tostring(element),
@@ -39,8 +46,7 @@
def test_module_HTML_access(self):
element = self.etree.HTML(self.html_str)
- element = element[0][0]
- self.assertEqual(element.tag, 'title')
+ self.assertEqual(element[0][0].tag, 'title')
def test_module_parse_html(self):
parser = self.etree.HTMLParser()
From scoder at codespeak.net Wed May 3 10:37:45 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 3 10:37:46 2006
Subject: [Lxml-checkins] r26710 - in lxml/trunk: . doc
Message-ID: <20060503083745.47A0D100C6@code0.codespeak.net>
Author: scoder
Date: Wed May 3 10:37:43 2006
New Revision: 26710
Modified:
lxml/trunk/INSTALL.txt
lxml/trunk/doc/api.txt
Log:
updated doctest for parsing broken HTML, mention that libxml2 2.6.21 is needed for this to work well
Modified: lxml/trunk/INSTALL.txt
==============================================================================
--- lxml/trunk/INSTALL.txt (original)
+++ lxml/trunk/INSTALL.txt Wed May 3 10:37:43 2006
@@ -8,12 +8,16 @@
You need libxml2 and libxslt, in particular:
-* libxml 2.6.16 (newer versions are recommended). It can be found here:
+* libxml 2.6.16 or later. It can be found here:
http://xmlsoft.org/downloads.html
-* libxslt 1.1.12 (newer versions are recommended). It can be found here:
+* libxslt 1.1.12 or later. It can be found here:
http://xmlsoft.org/XSLT/downloads.html
+Newer versions generally contain less bugs and are therefore recommended. The
+HTML parser benefits from libxml2 version 2.6.21 or later, which support
+parsing horribly broken HTML.
+
For Windows, there is a `binary distribution`_ of libxml2 and libxslt. Note
that you need both libxml2 and libxslt, as well as iconv and zlib. You can
then install the `binary egg distribution`_ of lxml (see below).
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Wed May 3 10:37:43 2006
@@ -41,9 +41,11 @@
HTML parsing is similarly simple. The parsers have a ``recover`` keyword
argument that the HTMLParser sets by default. It lets libxml2 try its best to
-return something usable without raising an exception::
+return something usable without raising an exception. Note that this
+functionality depends entirely on libxml2. You should use libxml2 version
+2.6.21 or newer to take advantage of this feature::
- >>> broken_html = "test page title"
+ >>> broken_html = "test page title"
>>> parser = lxml.etree.HTMLParser()
>>> et = lxml.etree.parse(StringIO(broken_html), parser)
From scoder at codespeak.net Wed May 3 11:00:31 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 3 11:00:32 2006
Subject: [Lxml-checkins] r26713 - lxml/trunk/doc
Message-ID: <20060503090031.5400C100BF@code0.codespeak.net>
Author: scoder
Date: Wed May 3 11:00:30 2006
New Revision: 26713
Modified:
lxml/trunk/doc/api.txt
Log:
doc updates: getting lxml version through API
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Wed May 3 11:00:30 2006
@@ -2,22 +2,29 @@
APIs specific to lxml
=====================
-lxml tries to follow established APIs wherever possible. Sometimes
-however the need to expose a feature in an easy way led to the
-invention of a new API.
+lxml tries to follow established APIs wherever possible. Sometimes, however,
+the need to expose a feature in an easy way led to the invention of a new API.
lxml.etree
==========
-lxml.etree tries to follow the etree API wherever it can. There are
-however some incompatibilities (see compatibility.txt). There are also
-some extensions.
+lxml.etree tries to follow the `ElementTree API`_ wherever it can. There are
+however some incompatibilities (see compatibility.txt). The extensions are
+documented here.
+
+.. _`ElementTree API`: http://effbot.org/zone/element-index.htm
The following examples usually assume this to be executed first::
>>> import lxml.etree
>>> from StringIO import StringIO
+If you need to know which version of lxml is installed, you can access the
+``lxml.etree.LXML_VERSION`` attribute to retrieve a version tuple. Note,
+however, that it did not exist before version 1.0, so you will get an
+AttributeError in older versions. The versions of libxml2 and libxslt are
+available through the attributes ``LIBXML_VERSION`` and ``LIBXSLT_VERSION``.
+
Parsers
-------
From scoder at codespeak.net Wed May 3 15:05:21 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 3 15:05:23 2006
Subject: [Lxml-checkins] r26722 - lxml/trunk/src/lxml/tests
Message-ID: <20060503130521.31DED10088@code0.codespeak.net>
Author: scoder
Date: Wed May 3 15:05:19 2006
New Revision: 26722
Modified:
lxml/trunk/src/lxml/tests/test_xslt.py
Log:
fix broken XSLT test case: should test for broken XSLT, not broken XML
Modified: lxml/trunk/src/lxml/tests/test_xslt.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xslt.py (original)
+++ lxml/trunk/src/lxml/tests/test_xslt.py Wed May 3 15:05:19 2006
@@ -94,10 +94,10 @@
def test_xslt_broken(self):
tree = self.parse(' ')
style = self.parse('''\
-
- ''')
+''')
self.assertRaises(etree.XSLTParseError,
etree.XSLT, style)
From scoder at codespeak.net Wed May 3 17:06:35 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 3 17:06:37 2006
Subject: [Lxml-checkins] r26725 - lxml/trunk/src/lxml
Message-ID: <20060503150635.E0C0B100A8@code0.codespeak.net>
Author: scoder
Date: Wed May 3 17:06:32 2006
New Revision: 26725
Modified:
lxml/trunk/src/lxml/tree.pxd
lxml/trunk/src/lxml/xmlerror.pxi
lxml/trunk/src/lxml/xmlparser.pxd
Log:
check error message arguments in _receiveGenericError()
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Wed May 3 17:06:32 2006
@@ -3,6 +3,7 @@
cdef extern from "stdio.h":
ctypedef struct FILE
cdef int strlen(char* s)
+ cdef char* strstr(char* haystack, char* needle)
cdef int strcmp(char* s1, char* s2)
cdef int strncmp(char* s1, char* s2, int len)
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Wed May 3 17:06:32 2006
@@ -267,27 +267,45 @@
log_handler = __GLOBAL_ERROR_LOG
cstd.va_start(args, msg)
- c_text = cstd.va_charptr(args)
- c_filename = cstd.va_charptr(args)
- c_line = cstd.va_int(args)
- c_element = cstd.va_charptr(args)
- cstd.va_end(args)
-
- if c_text is NULL:
- message = None
- elif c_element is NULL:
- message = funicode(c_text)
+ if tree.strncmp(msg, '%s:', 3) == 0:
+ c_text = cstd.va_charptr(args)
+ else:
+ c_text = NULL
+ if tree.strstr(msg, 'file %s') is not NULL:
+ c_filename = cstd.va_charptr(args)
+ else:
+ c_filename = NULL
+ if tree.strstr(msg, 'line %d') is not NULL:
+ c_line = cstd.va_int(args)
+ else:
+ c_line = -1
+ if tree.strstr(msg, 'element %s') is not NULL:
+ c_element = cstd.va_charptr(args)
else:
- message = "%s (element '%s')" % (
- funicode(c_text), funicode(c_element))
+ c_element = NULL
+ cstd.va_end(args)
- if c_filename is not NULL and tree.strlen(c_filename) > 0:
- if tree.strncmp(c_filename, 'XSLT:', 5) == 0:
- filename = ''
+ try:
+ if c_text is NULL:
+ message = None
+ elif c_element is NULL:
+ message = funicode(c_text)
else:
- filename = funicode(c_filename)
- else:
- filename = None
+ message = "%s (element '%s')" % (
+ funicode(c_text), funicode(c_element))
+ except UnicodeDecodeError:
+ message = ""
+
+ try:
+ if c_filename is not NULL and tree.strlen(c_filename) > 0:
+ if tree.strncmp(c_filename, 'XSLT:', 5) == 0:
+ filename = ''
+ else:
+ filename = funicode(c_filename)
+ else:
+ filename = None
+ except UnicodeDecodeError:
+ filename = ""
log_handler._receiveGeneric(xmlerror.XML_FROM_XSLT,
xmlerror.XML_ERR_OK,
@@ -306,6 +324,9 @@
# init global logging
initThreadLogging()
+# switch on line number reporting
+xmlparser.xmlLineNumbersDefault(1)
+
################################################################################
## CONSTANTS FROM "xmlerror.pxd"
################################################################################
Modified: lxml/trunk/src/lxml/xmlparser.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlparser.pxd (original)
+++ lxml/trunk/src/lxml/xmlparser.pxd Wed May 3 17:06:32 2006
@@ -39,9 +39,15 @@
#XML_PARSE_COMPACT = 65536 # compact small text nodes
cdef void xmlInitParser()
+ cdef int xmlLineNumbersDefault(int onoff)
cdef xmlParserCtxt* xmlNewParserCtxt()
+ cdef int xmlCtxtUseOptions(xmlParserCtxt* ctxt, int options)
cdef void xmlFreeParserCtxt(xmlParserCtxt* ctxt)
-
+ cdef int xmlCtxtResetPush(xmlParserCtxt* ctxt,
+ char* chunk, int size,
+ char* filename, char* encoding)
+ cdef int xmlParseChunk(xmlParserCtxt* ctxt,
+ char* chunk, int size, int terminate)
cdef xmlDoc* xmlCtxtReadDoc(xmlParserCtxt* ctxt,
char* cur, char* URL, char* encoding,
int options)
From scoder at codespeak.net Wed May 3 17:40:12 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 3 17:40:14 2006
Subject: [Lxml-checkins] r26726 - in lxml/trunk/src/lxml: . tests
Message-ID: <20060503154012.02F811008F@code0.codespeak.net>
Author: scoder
Date: Wed May 3 17:40:10 2006
New Revision: 26726
Modified:
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/htmlparser.pxd
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/tests/common_imports.py
lxml/trunk/src/lxml/tests/test_io.py
Log:
rewrite of file-like object reading in XML parser
* use StringIO.getvalue() for StingIO's iff we read from the start
* use libxml2 chunk parser for any other file-like object to avoid reading the whole string into memory
* test with LargeFileLike object
* try to provide file URLs to parser wherever possible
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Wed May 3 17:40:10 2006
@@ -1357,12 +1357,12 @@
def HTML(text):
cdef _Document doc
- doc = _parseMemoryDocument(text, __DEFAULT_HTML_PARSER)
+ doc = _parseMemoryDocument(text, None, __DEFAULT_HTML_PARSER)
return doc.getroot()
def XML(text):
cdef _Document doc
- doc = _parseMemoryDocument(text, __DEFAULT_XML_PARSER)
+ doc = _parseMemoryDocument(text, None, __DEFAULT_XML_PARSER)
return doc.getroot()
fromstring = XML
Modified: lxml/trunk/src/lxml/htmlparser.pxd
==============================================================================
--- lxml/trunk/src/lxml/htmlparser.pxd (original)
+++ lxml/trunk/src/lxml/htmlparser.pxd Wed May 3 17:40:10 2006
@@ -13,14 +13,14 @@
# HTML_PARSE_RECOVER # Relaxed parsing
# HTML_PARSE_COMPACT # compact small text nodes
- xmlParserCtxt* htmlCreateMemoryParserCtxt(char* buffer, int size)
- xmlParserCtxt* htmlCreateFileParserCtxt(char* filename, char* encoding)
- void htmlFreeParserCtxt(xmlParserCtxt* ctxt)
- int htmlParseDocument(xmlParserCtxt* ctxt)
+ cdef xmlParserCtxt* htmlCreateMemoryParserCtxt(char* buffer, int size)
+ cdef xmlParserCtxt* htmlCreateFileParserCtxt(char* filename, char* encoding)
+ cdef void htmlFreeParserCtxt(xmlParserCtxt* ctxt)
+ cdef int htmlParseDocument(xmlParserCtxt* ctxt)
- xmlDoc* htmlCtxtReadFile(xmlParserCtxt* ctxt,
- char* filename, char* encoding,
- int options)
- xmlDoc* htmlCtxtReadDoc(xmlParserCtxt* ctxt,
- char* buffer, char* URL, char* encoding,
- int options)
+ cdef xmlDoc* htmlCtxtReadFile(xmlParserCtxt* ctxt,
+ char* filename, char* encoding,
+ int options)
+ cdef xmlDoc* htmlCtxtReadDoc(xmlParserCtxt* ctxt,
+ char* buffer, char* URL, char* encoding,
+ int options)
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Wed May 3 17:40:10 2006
@@ -191,6 +191,9 @@
xmlparser.XML_PARSE_NOERROR
)
+cdef object __FILE_READ_CHUNK_SIZE
+__FILE_READ_CHUNK_SIZE = 32768
+
cdef class XMLParser(BaseParser):
"""The XML parser. Parsers can be supplied as additional argument to
various parse functions of the lxml API. A default parser is always
@@ -207,6 +210,7 @@
cdef int _parse_options
cdef xmlParserCtxt* _file_parser_ctxt
cdef xmlParserCtxt* _memory_parser_ctxt
+ cdef xmlParserCtxt* _push_parser_ctxt
def __init__(self, attribute_defaults=False, dtd_validation=False,
load_dtd=False, no_network=False, ns_clean=False,
recover=False):
@@ -237,6 +241,8 @@
xmlparser.xmlFreeParserCtxt(self._file_parser_ctxt)
if self._memory_parser_ctxt != NULL:
xmlparser.xmlFreeParserCtxt(self._memory_parser_ctxt)
+ if self._push_parser_ctxt != NULL:
+ xmlparser.xmlFreeParserCtxt(self._push_parser_ctxt)
def copy(self):
cdef XMLParser parser
@@ -252,7 +258,7 @@
raise ParserError, "Failed to create parser context"
return pctxt
- cdef xmlDoc* _parseDoc(self, char* c_text) except NULL:
+ cdef xmlDoc* _parseDoc(self, char* c_text, char* c_filename) except NULL:
"""Parse document, share dictionary if possible.
"""
cdef xmlDoc* result
@@ -265,7 +271,7 @@
self._memory_parser_ctxt = pctxt
self._initContext(pctxt)
result = xmlparser.xmlCtxtReadDoc(
- pctxt, c_text, NULL, NULL, self._parse_options)
+ pctxt, c_text, c_filename, NULL, self._parse_options)
self._error_log.disconnect()
recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
return _handleParseResult(pctxt, result, NULL, recover)
@@ -286,6 +292,53 @@
recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
return _handleParseResult(pctxt, result, c_filename, recover)
+ cdef xmlDoc* _parseDocFromFilelike(self, filelike,
+ char* c_filename) except NULL:
+ cdef xmlDoc* result
+ cdef xmlParserCtxt* pctxt
+ cdef int recover
+ cdef int success
+ self._error_log.connect()
+ pctxt = self._push_parser_ctxt
+ if pctxt is NULL:
+ pctxt = self._createContext()
+ self._push_parser_ctxt = pctxt
+ self._initContext(pctxt)
+ result = NULL
+ success = xmlparser.xmlCtxtResetPush(pctxt, NULL, 0, c_filename, NULL)
+ if success != 0:
+ self._error_log.disconnect()
+ raise ParserError, "Failed to setup parser context"
+ xmlparser.xmlCtxtUseOptions(pctxt, self._parse_options)
+
+ try:
+ read = filelike.read
+ data = read(__FILE_READ_CHUNK_SIZE)
+ if python.PyUnicode_Check(data):
+ data = _stripDeclaration(_utf8(data))
+ while data:
+ if python.PyUnicode_Check(data):
+ data = _utf8(data)
+ elif not python.PyString_Check(data):
+ raise TypeError, "File-like objects must return string or unicode"
+ success = xmlparser.xmlParseChunk(pctxt, _cstr(data), len(data), 0)
+ if success != 0:
+ return _handleParseResult(pctxt, NULL, c_filename, 0)
+ data = read(__FILE_READ_CHUNK_SIZE)
+ xmlparser.xmlParseChunk(pctxt, NULL, 0, 1)
+ except Exception:
+ if pctxt.myDoc is not NULL:
+ tree.xmlFreeDoc(pctxt.myDoc)
+ pctxt.myDoc = NULL
+ self._error_log.disconnect()
+ raise
+
+ self._error_log.disconnect()
+ result = pctxt.myDoc
+ pctxt.myDoc = NULL
+ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
+ return _handleParseResult(pctxt, result, c_filename, recover)
+
cdef xmlDoc* _internalParseDoc(char* c_text, int options,
_ResolverContext context) except NULL:
# internal parser function for XSLT
@@ -404,7 +457,7 @@
parser._parse_options = self._parse_options
return parser
- cdef xmlDoc* _parseDoc(self, char* c_text) except NULL:
+ cdef xmlDoc* _parseDoc(self, char* c_text, char* c_filename) except NULL:
"""Parse HTML document, share dictionary if possible.
"""
cdef xmlDoc* result
@@ -421,7 +474,7 @@
self._memory_parser_ctxt = pctxt
self._initContext(pctxt)
result = htmlparser.htmlCtxtReadDoc(
- pctxt, c_text, NULL, NULL, self._parse_options)
+ pctxt, c_text, c_filename, NULL, self._parse_options)
self._error_log.disconnect()
recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
return _handleParseResult(pctxt, result, NULL, recover)
@@ -455,14 +508,19 @@
## helper functions for document creation
############################################################
-cdef xmlDoc* _parseDoc(text_utf, parser) except NULL:
+cdef xmlDoc* _parseDoc(text_utf, filename, parser) except NULL:
+ cdef char* c_filename
if parser is None:
parser = __DEFAULT_PARSER
__GLOBAL_PARSER_CONTEXT._initParser()
+ if not filename:
+ c_filename = NULL
+ else:
+ c_filename = _cstr(filename)
if isinstance(parser, XMLParser):
- return (parser)._parseDoc(_cstr(text_utf))
+ return (parser)._parseDoc(_cstr(text_utf), c_filename)
elif isinstance(parser, HTMLParser):
- return (parser)._parseDoc(_cstr(text_utf))
+ return (parser)._parseDoc(_cstr(text_utf), c_filename)
else:
raise TypeError, "invalid parser"
@@ -477,6 +535,23 @@
else:
raise TypeError, "invalid parser"
+cdef xmlDoc* _parseDocFromFilelike(source, filename, parser) except NULL:
+ cdef char* c_filename
+ if parser is None:
+ parser = __DEFAULT_PARSER
+ __GLOBAL_PARSER_CONTEXT._initParser()
+ if not filename:
+ c_filename = NULL
+ else:
+ c_filename = _cstr(filename)
+ if isinstance(parser, XMLParser):
+ return (parser)._parseDocFromFilelike(source, c_filename)
+ elif isinstance(parser, HTMLParser):
+ data = source.read()
+ return (parser)._parseDoc(_cstr(data), c_filename)
+ else:
+ raise TypeError, "invalid parser"
+
cdef xmlDoc* _newDoc():
cdef xmlDoc* result
result = tree.xmlNewDoc("1.0")
@@ -490,9 +565,14 @@
cdef _Document _parseDocument(source, parser):
cdef xmlDoc* c_doc
filename = _getFilenameForFile(source)
+ if hasattr(source, 'getvalue') and hasattr(source, 'tell'):
+ # StringIO - reading from start?
+ if source.tell() == 0:
+ return _parseMemoryDocument(source.getvalue(), filename, parser)
+
# Support for unamed file-like object (StringIO, urlgrabber.urlopen, ...)
if not filename and hasattr(source, 'read'):
- return _parseMemoryDocument(source.read(), parser)
+ return _parseFilelikeDocument(source, filename, parser)
# Otherwise parse the file directly from the filesystem
if filename is None:
@@ -501,10 +581,14 @@
c_doc = _parseDocFromFile(_utf8(filename), parser)
return _documentFactory(c_doc, parser)
-cdef _Document _parseMemoryDocument(text, parser):
+cdef _Document _parseMemoryDocument(text, url, parser):
cdef xmlDoc* c_doc
if python.PyUnicode_Check(text):
text = _stripDeclaration(_utf8(text))
- c_doc = _parseDoc(text, parser)
+ c_doc = _parseDoc(text, url, parser)
return _documentFactory(c_doc, parser)
+cdef _Document _parseFilelikeDocument(source, filename, parser):
+ cdef xmlDoc* c_doc
+ c_doc = _parseDocFromFilelike(source, filename, parser)
+ return _documentFactory(c_doc, parser)
Modified: lxml/trunk/src/lxml/tests/common_imports.py
==============================================================================
--- lxml/trunk/src/lxml/tests/common_imports.py (original)
+++ lxml/trunk/src/lxml/tests/common_imports.py Wed May 3 17:40:10 2006
@@ -20,14 +20,57 @@
class SillyFileLike:
def __init__(self, xml_data=' '):
self.xml_data = xml_data
- self.done = False
def read(self, amount=None):
- if not self.done:
- self.done = True
- return self.xml_data
+ if self.xml_data:
+ if amount:
+ data = self.xml_data[:amount]
+ self.xml_data = self.xml_data[amount:]
+ else:
+ data = self.xml_data
+ self.xml_data = ''
+ return data
return ''
+class LargeFileLike:
+ def __init__(self, charlen=100, depth=4, children=10):
+ self.data = StringIO()
+ self.chars = 'a' * charlen
+ self.children = range(children)
+ self.more = self.iterelements(depth)
+
+ def iterelements(self, depth):
+ yield ''
+ depth -= 1
+ if depth > 0:
+ for child in self.children:
+ for element in self.iterelements(depth):
+ yield element
+ yield self.chars
+ else:
+ yield self.chars
+ yield ' '
+
+ def read(self, amount=None):
+ data = self.data
+ append = data.write
+ if amount:
+ for element in self.more:
+ append(element)
+ if data.tell() >= amount:
+ break
+ else:
+ for element in self.more:
+ append(element)
+ result = data.getvalue()
+ if amount:
+ self.data = StringIO(result[amount:])
+ result = result[:amount]
+ else:
+ data.seek(0)
+ data.truncate()
+ return result
+
def fileInTestDir(name):
_testdir = os.path.split(__file__)[0]
return os.path.join(_testdir, name)
Modified: lxml/trunk/src/lxml/tests/test_io.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_io.py (original)
+++ lxml/trunk/src/lxml/tests/test_io.py Wed May 3 17:40:10 2006
@@ -7,7 +7,8 @@
import unittest
import tempfile, gzip
-from common_imports import etree, ElementTree, fileInTestDir, SillyFileLike
+from common_imports import etree, ElementTree, fileInTestDir
+from common_imports import SillyFileLike, LargeFileLike
class IOTestCaseBase(unittest.TestCase):
"""(c)ElementTree compatibility for IO functions/methods
@@ -84,6 +85,29 @@
root = self.etree.ElementTree().parse(f)
self.assert_(root.tag.endswith('foo'))
+ def test_module_parse_large_fileobject(self):
+ # parse from unamed file object
+ f = LargeFileLike()
+ tree = self.etree.parse(f)
+ root = tree.getroot()
+ self.assert_(root.tag.endswith('root'))
+
+ def test_module_parse_fileobject_error(self):
+ class LocalError(Exception):
+ pass
+ class TestFile:
+ def read(*args):
+ raise LocalError
+ f = TestFile()
+ self.assertRaises(LocalError, self.etree.parse, f)
+
+ def test_module_parse_fileobject_type_error(self):
+ class TestFile:
+ def read(*args):
+ return 1
+ f = TestFile()
+ self.assertRaises(TypeError, self.etree.parse, f)
+
class ETreeIOTestCase(IOTestCaseBase):
etree = etree
From scoder at codespeak.net Wed May 3 17:47:34 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 3 17:47:36 2006
Subject: [Lxml-checkins] r26727 - lxml/trunk
Message-ID: <20060503154734.B81871008F@code0.codespeak.net>
Author: scoder
Date: Wed May 3 17:47:33 2006
New Revision: 26727
Modified:
lxml/trunk/CHANGES.txt
Log:
updated CHANGES.txt
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed May 3 17:47:33 2006
@@ -7,6 +7,10 @@
Features added
--------------
+* Parsing file-like objects now reads chunks rather than the whole file
+
+* Parsing StringIO objects from the start avoid copying the string
+
* Read-only 'docinfo' attribute in ElementTree class holds DOCTYPE
information, original encoding and XML version as seen by the parser
From scoder at codespeak.net Wed May 3 17:53:03 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 3 17:53:05 2006
Subject: [Lxml-checkins] r26728 - lxml/trunk/src/lxml/tests
Message-ID: <20060503155303.E66451008F@code0.codespeak.net>
Author: scoder
Date: Wed May 3 17:53:03 2006
New Revision: 26728
Modified:
lxml/trunk/src/lxml/tests/test_io.py
Log:
new test case for exception after parsing has started
Modified: lxml/trunk/src/lxml/tests/test_io.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_io.py (original)
+++ lxml/trunk/src/lxml/tests/test_io.py Wed May 3 17:53:03 2006
@@ -101,6 +101,23 @@
f = TestFile()
self.assertRaises(LocalError, self.etree.parse, f)
+ def test_module_parse_fileobject_late_error(self):
+ class LocalError(Exception):
+ pass
+ class TestFile:
+ data = 'test'
+ next_char = iter(data).next
+ counter = 0
+ def read(self, *args):
+ try:
+ self.counter += 1
+ return self.next_char()
+ except StopIteration:
+ raise LocalError
+ f = TestFile()
+ self.assertRaises(LocalError, self.etree.parse, f)
+ self.assertEquals(f.counter, len(f.data)+1)
+
def test_module_parse_fileobject_type_error(self):
class TestFile:
def read(*args):
From scoder at codespeak.net Wed May 3 17:56:03 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 3 17:56:04 2006
Subject: [Lxml-checkins] r26729 - lxml/trunk
Message-ID: <20060503155603.5A8C71008F@code0.codespeak.net>
Author: scoder
Date: Wed May 3 17:56:00 2006
New Revision: 26729
Modified:
lxml/trunk/CHANGES.txt
Log:
typo
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed May 3 17:56:00 2006
@@ -9,7 +9,7 @@
* Parsing file-like objects now reads chunks rather than the whole file
-* Parsing StringIO objects from the start avoid copying the string
+* Parsing StringIO objects from the start avoids copying the string
* Read-only 'docinfo' attribute in ElementTree class holds DOCTYPE
information, original encoding and XML version as seen by the parser
From scoder at codespeak.net Thu May 4 08:19:33 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Thu May 4 08:19:36 2006
Subject: [Lxml-checkins] r26748 - lxml/trunk
Message-ID: <20060504061933.DE359100A0@code0.codespeak.net>
Author: scoder
Date: Thu May 4 08:19:31 2006
New Revision: 26748
Modified:
lxml/trunk/CHANGES.txt
Log:
doc updates
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu May 4 08:19:31 2006
@@ -15,15 +15,15 @@
information, original encoding and XML version as seen by the parser
* etree module can be compiled without libxslt by commenting out the line
- 'include "xslt.pxi"' at the end of the etree.pyx source file
+ 'include "xslt.pxi"' near the end of the etree.pyx source file
* Error reporting now also works in XSLT
* Support for custom document loaders (URI resolvers) in parsers and XSLT,
resolvers are registered at parser level
-* Exslt:regexp implementation for XSLT based on the Python 're' module
- on by default, can be switched off with 'regexp=False' keyword argument
+* Implementation of exslt:regexp for XSLT based on the Python 're' module,
+ enabled by default, can be switched off with 'regexp=False' keyword argument
* Support for exslt extensions (libexslt) and node-set function
From scoder at codespeak.net Thu May 4 08:27:00 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Thu May 4 08:27:03 2006
Subject: [Lxml-checkins] r26749 - lxml/trunk/src/lxml
Message-ID: <20060504062700.13A5E10090@code0.codespeak.net>
Author: scoder
Date: Thu May 4 08:26:58 2006
New Revision: 26749
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
parse as file-like object even if filename is known (there might be a reason the user gave us a file-like)
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Thu May 4 08:26:58 2006
@@ -570,8 +570,8 @@
if source.tell() == 0:
return _parseMemoryDocument(source.getvalue(), filename, parser)
- # Support for unamed file-like object (StringIO, urlgrabber.urlopen, ...)
- if not filename and hasattr(source, 'read'):
+ # Support for file-like objects (urlgrabber.urlopen, ...)
+ if hasattr(source, 'read'):
return _parseFilelikeDocument(source, filename, parser)
# Otherwise parse the file directly from the filesystem
From scoder at codespeak.net Thu May 4 08:56:44 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Thu May 4 08:56:47 2006
Subject: [Lxml-checkins] r26750 - lxml/trunk/src/lxml
Message-ID: <20060504065644.DA45E10092@code0.codespeak.net>
Author: scoder
Date: Thu May 4 08:56:41 2006
New Revision: 26750
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
Log:
support finding file-like URL for urllib2 handlers
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Thu May 4 08:56:41 2006
@@ -348,6 +348,9 @@
# gzip file instances have a filename attribute
if hasattr(source, 'filename'):
return source.filename
+ # urllib2
+ if hasattr(source, 'geturl'):
+ return source.geturl()
return None
cdef void changeDocumentBelow(_NodeBase node, _Document doc, int recursive):
From scoder at codespeak.net Thu May 4 08:58:09 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Thu May 4 08:58:12 2006
Subject: [Lxml-checkins] r26751 - lxml/trunk/src/lxml
Message-ID: <20060504065809.2CC2610092@code0.codespeak.net>
Author: scoder
Date: Thu May 4 08:58:04 2006
New Revision: 26751
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
convert URLs/filenames to UTF-8 in document parser functions
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Thu May 4 08:58:04 2006
@@ -585,10 +585,14 @@
cdef xmlDoc* c_doc
if python.PyUnicode_Check(text):
text = _stripDeclaration(_utf8(text))
+ if url is not None:
+ url = _utf8(url)
c_doc = _parseDoc(text, url, parser)
return _documentFactory(c_doc, parser)
-cdef _Document _parseFilelikeDocument(source, filename, parser):
+cdef _Document _parseFilelikeDocument(source, url, parser):
cdef xmlDoc* c_doc
- c_doc = _parseDocFromFilelike(source, filename, parser)
+ if url is not None:
+ url = _utf8(url)
+ c_doc = _parseDocFromFilelike(source, url, parser)
return _documentFactory(c_doc, parser)
From scoder at codespeak.net Thu May 4 12:11:04 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Thu May 4 12:11:06 2006
Subject: [Lxml-checkins] r26755 - lxml/trunk/doc
Message-ID: <20060504101104.E129410092@code0.codespeak.net>
Author: scoder
Date: Thu May 4 12:11:03 2006
New Revision: 26755
Modified:
lxml/trunk/doc/extensions.txt
Log:
loads of restructuring and clarifications in doc/extensions.txt
Modified: lxml/trunk/doc/extensions.txt
==============================================================================
--- lxml/trunk/doc/extensions.txt (original)
+++ lxml/trunk/doc/extensions.txt Thu May 4 12:11:03 2006
@@ -9,13 +9,15 @@
Here is how such a function looks like. As the first argument, it always
receives a dummy object. It is currently None, but do not rely on this as it
may become meaningful in later versions of lxml. The other arguments are
-provided by the respective call in the XPath expression. Any number of
-arguments is allowed::
+provided by the respective call in the XPath expression, one in the following
+examples. Any number of arguments is allowed::
>>> def hello(dummy, a):
... return "Hello %s" % a
>>> def ola(dummy, a):
... return "Ola %s" % a
+ >>> def loadsofargs(dummy, *args):
+ ... return "Got %d arguments." % len(args)
The FunctionNamespace
@@ -29,10 +31,12 @@
>>> from lxml import etree
>>> ns = etree.FunctionNamespace(None)
>>> ns['hello'] = hello
+ >>> ns['countargs'] = loadsofargs
-This registers the function 'foo' with the name 'myfunction' in the default
-namespace. Now we're going to create a document that we can run XPath
-expressions against::
+This registers the function `hello` with the name `hello` in the default
+namespace (None), and the function `loadsofargs` with the name `countargs`.
+Now we're going to create a document that we can run XPath expressions
+against::
>>> from lxml import etree
>>> from StringIO import StringIO
@@ -48,15 +52,18 @@
Hello b
>>> print root.xpath('hello(string(b))')
Hello Haegar
+ >>> print root.xpath('countargs(., b, ./*)')
+ Got 3 arguments.
-Note how we call both a Python function (hello) and an XPath built-in function
-(local-name) in exactly the same way. Normally, however, you would want to
-separate the two in different namespaces. The FunctionNamespace class allows
-you to do this::
+Note how we call both a Python function (`hello`) and an XPath built-in
+function (`string`) in exactly the same way. Normally, however, you would
+want to separate the two in different namespaces. The FunctionNamespace class
+allows you to do this::
>>> ns = etree.FunctionNamespace('http://mydomain.org/myfunctions')
>>> ns['hello'] = hello
- >>> print root.xpath('f:hello(local-name(*))', {'f' : 'http://mydomain.org/myfunctions'})
+ >>> prefixmap = {'f' : 'http://mydomain.org/myfunctions'}
+ >>> print root.xpath('f:hello(local-name(*))', prefixmap)
Hello b
@@ -84,39 +91,11 @@
would rather complicate things than be of any help.
-What to return from a function
-------------------------------
-
-Extension functions can return any data type for which there is an XPath
-equivalent. This includes numbers, boolean values, elements and lists of
-elements::
-
- >>> def returnsFloat(_):
- ... return 1.7
- >>> def returnsBool(_):
- ... return True
- >>> def returnFirstNode(_, nodes):
- ... return nodes[0]
-
- >>> ns = etree.FunctionNamespace(None)
- >>> ns['float'] = returnsFloat
- >>> ns['bool'] = returnsBool
- >>> ns['first'] = returnFirstNode
-
- >>> e = etree.XPathEvaluator(doc)
- >>> e.evaluate("float()")
- 1.7
- >>> e.evaluate("bool()")
- True
- >>> e.evaluate("count(first(//b))")
- 1.0
-
-
Evaluators and XSLT
-------------------
Extension functions work for all ways of evaluating XPath expressions and for
-XSLT execution::
+XSL transformations::
>>> e = etree.XPathEvaluator(doc)
>>> print e.evaluate('es:hello(local-name(/a))')
@@ -142,64 +121,120 @@
It is also possible to register namespaces with a single evaluator. While the
following example involves no functions, the idea should still be clear::
- >>> f = StringIO(' ')
+ >>> f = StringIO(' ')
>>> ns_doc = etree.parse(f)
>>> e = etree.XPathEvaluator(ns_doc)
>>> e.evaluate('/a')
[]
-This obviously returns nothing, but when we register the namespace with the
-evaluator, we can access it via a prefix. Note that this prefix mapping is
-only known to this evaluator, as opposed to the global mapping of the
-FunctionNamespace objects::
+This returns nothing, as we did not ask for the right namespace. When we
+register the namespace with the evaluator, we can access it via a prefix::
>>> e.registerNamespace('foo', 'http://mydomain.org/myfunctions')
>>> e.evaluate('/foo:a')[0].tag
'{http://mydomain.org/myfunctions}a'
+Note that this prefix mapping is only known to this evaluator, as opposed to
+the global mapping of the FunctionNamespace objects::
+
+ >>> e2 = etree.XPathEvaluator(ns_doc)
+ >>> e2.evaluate('/foo:a')
+ Traceback (most recent call last):
+ ...
+ XPathSyntaxError: Error in xpath expression.
-BETA Features
--------------
-Note: the following features are still in beta state. They may not work as
-expected.
+What to return from a function
+------------------------------
+
+Extension functions can return any data type for which there is an XPath
+equivalent. This includes numbers, boolean values, elements and lists of
+elements. Note that integers will also be returned as floats::
+
+ >>> def returnsFloat(_):
+ ... return 1.7
+ >>> def returnsInteger(_):
+ ... return 1
+ >>> def returnsBool(_):
+ ... return True
+ >>> def returnFirstNode(_, nodes):
+ ... return nodes[0]
-It is possible to return lists of newly created nodes as XML structures::
+ >>> ns = etree.FunctionNamespace(None)
+ >>> ns['float'] = returnsFloat
+ >>> ns['int'] = returnsInteger
+ >>> ns['bool'] = returnsBool
+ >>> ns['first'] = returnFirstNode
- >>> def returnsNodeSet(evaluator):
- ... results = etree.Element('results')
- ... result = etree.SubElement(results, 'result')
- ... result.text = "Alpha"
- ... result2 = etree.SubElement(results, 'result')
- ... result2.text = "Beta"
- ... result3 = etree.SubElement(results, 'result')
- ... result3.text = "Gamma"
- ... return [results]
- >>> extension4 = { (None, 'returnsNodeSet') : returnsNodeSet }
- >>> e = etree.XPathEvaluator(doc, None, extensions=[extension4])
- >>> r = e.evaluate("returnsNodeSet()")
- >>> len(r)
+ >>> e = etree.XPathEvaluator(doc)
+ >>> e.evaluate("float()")
+ 1.7
+ >>> e.evaluate("int()")
+ 1.0
+ >>> int( e.evaluate("int()") )
1
- >>> t = r[0]
- >>> t.tag
- 'results'
- >>> len(t)
- 3
- >>> t[0].tag
- 'result'
- >>> t[0].text
+ >>> e.evaluate("bool()")
+ True
+ >>> e.evaluate("count(first(//b))")
+ 1.0
+
+As the last example shows, you can pass the results of functions back into
+the XPath expression. Elements and sequences of elements are treated as
+XPath node-sets::
+
+ >>> def returnsNodeSet(_):
+ ... results1 = etree.Element('results1')
+ ... etree.SubElement(results1, 'result').text = "Alpha"
+ ... etree.SubElement(results1, 'result').text = "Beta"
+ ...
+ ... results2 = etree.Element('results2')
+ ... etree.SubElement(results2, 'result').text = "Gamma"
+ ... etree.SubElement(results2, 'result').text = "Delta"
+ ...
+ ... results3 = etree.SubElement(results2, 'subresult')
+ ... return [results1, results2, results3]
+
+ >>> ns['new-node-set'] = returnsNodeSet
+
+ >>> e = etree.XPathEvaluator(doc, None)
+
+ >>> r = e.evaluate("new-node-set()/result")
+ >>> print [ t.text for t in r ]
+ ['Alpha', 'Beta', 'Gamma', 'Delta']
+
+ >>> r = e.evaluate("new-node-set()")
+ >>> print [ t.tag for t in r ]
+ ['results1', 'results2', 'subresult']
+ >>> print [ len(t) for t in r ]
+ [2, 3, 0]
+ >>> r[0][0].text
'Alpha'
- >>> t[1].text
- 'Beta'
-It's even possible to filter that result set with another XPath expression::
+ >>> print etree.tostring(r[0])
+ Alpha Beta
- >>> r = e.evaluate("returnsNodeSet()/result")
- >>> len(r)
- 3
- >>> r[0].tag
- 'result'
- >>> r[1].tag
- 'result'
- >>> r[0].text
- 'Alpha'
+ >>> print etree.tostring(r[1])
+ Gamma Delta
+
+ >>> print etree.tostring(r[2])
+
+
+The current implementation deep-copies newly created elements in node-sets.
+Only the elements and their children are passed on, no outlying parents or
+tail texts will be available in the result. This also means that in the above
+example, the `subresult` elements in `results2` and `results3` are no longer
+identical within the node-set, they belong to independent trees::
+
+ >>> print r[1][-1].tag, r[2].tag
+ subresult subresult
+ >>> print r[1][-1] == r[2]
+ False
+ >>> print r[1][-1].getparent().tag
+ results2
+ >>> print r[2].getparent()
+ None
+
+This is an implementation detail that you should be aware of, but you should
+avoid relying on it in your code. Note that elements taken from the source
+document (the most common case) do not suffer from this restriction. They
+will always be passed unchanged.
From scoder at codespeak.net Fri May 5 08:49:58 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 08:50:02 2006
Subject: [Lxml-checkins] r26788 - lxml/trunk/src/lxml/tests
Message-ID: <20060505064958.7CE8A10088@code0.codespeak.net>
Author: scoder
Date: Fri May 5 08:49:55 2006
New Revision: 26788
Modified:
lxml/trunk/src/lxml/tests/test_unicode.py
Log:
fix file encoding to use UTF-8
Modified: lxml/trunk/src/lxml/tests/test_unicode.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_unicode.py (original)
+++ lxml/trunk/src/lxml/tests/test_unicode.py Fri May 5 08:49:55 2006
@@ -1,4 +1,4 @@
-# -*- coding: UTF-8 -*-
+# -*- coding: utf-8 -*-
import unittest, doctest
from StringIO import StringIO
@@ -6,7 +6,7 @@
ascii_uni = u'a'
-uni = u'?\uF8D2' # klingon etc.
+uni = u'??\uF8D2' # klingon etc.
class UnicodeTestCase(unittest.TestCase):
def test_unicode_xml(self):
From scoder at codespeak.net Fri May 5 09:06:26 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 09:06:30 2006
Subject: [Lxml-checkins] r26789 - lxml/trunk/src/lxml/tests
Message-ID: <20060505070626.9103B10086@code0.codespeak.net>
Author: scoder
Date: Fri May 5 09:06:19 2006
New Revision: 26789
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
lxml/trunk/src/lxml/tests/test_errors.py
lxml/trunk/src/lxml/tests/test_etree.py
lxml/trunk/src/lxml/tests/test_io.py
lxml/trunk/src/lxml/tests/test_nsclasses.py
lxml/trunk/src/lxml/tests/test_relaxng.py
lxml/trunk/src/lxml/tests/test_sax.py
lxml/trunk/src/lxml/tests/test_xmlschema.py
lxml/trunk/src/lxml/tests/test_xpathevaluator.py
lxml/trunk/src/lxml/tests/test_xslt.py
Log:
fix file encoding header for Emacs
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Fri May 5 09:06:19 2006
@@ -1,4 +1,4 @@
-# -*- coding: UTF-8 -*-
+# -*- coding: utf-8 -*-
"""
Tests for the ElementTree API
Modified: lxml/trunk/src/lxml/tests/test_errors.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_errors.py (original)
+++ lxml/trunk/src/lxml/tests/test_errors.py Fri May 5 09:06:19 2006
@@ -1,4 +1,4 @@
-?# -*- coding: UTF-8 -*-
+?# -*- coding: utf-8 -*-
import unittest, doctest
# These tests check that error handling in the Pyrex code is
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Fri May 5 09:06:19 2006
@@ -1,4 +1,4 @@
-# -*- coding: UTF-8 -*-
+# -*- coding: utf-8 -*-
"""
Tests specific to the extended etree API
Modified: lxml/trunk/src/lxml/tests/test_io.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_io.py (original)
+++ lxml/trunk/src/lxml/tests/test_io.py Fri May 5 09:06:19 2006
@@ -1,4 +1,4 @@
-# -*- coding: UTF-8 -*-
+# -*- coding: utf-8 -*-
"""
IO test cases that apply to both etree and ElementTree
Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_nsclasses.py (original)
+++ lxml/trunk/src/lxml/tests/test_nsclasses.py Fri May 5 09:06:19 2006
@@ -1,4 +1,4 @@
-# -*- coding: UTF-8 -*-
+# -*- coding: utf-8 -*-
"""
Test cases related to namespace implementation classes and the
Modified: lxml/trunk/src/lxml/tests/test_relaxng.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_relaxng.py (original)
+++ lxml/trunk/src/lxml/tests/test_relaxng.py Fri May 5 09:06:19 2006
@@ -1,4 +1,4 @@
-# -*- coding: UTF-8 -*-
+# -*- coding: utf-8 -*-
"""
Test cases related to RelaxNG parsing and validation
Modified: lxml/trunk/src/lxml/tests/test_sax.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_sax.py (original)
+++ lxml/trunk/src/lxml/tests/test_sax.py Fri May 5 09:06:19 2006
@@ -1,4 +1,4 @@
-# -*- coding: UTF-8 -*-
+# -*- coding: utf-8 -*-
"""
Test cases related to SAX I/O
Modified: lxml/trunk/src/lxml/tests/test_xmlschema.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xmlschema.py (original)
+++ lxml/trunk/src/lxml/tests/test_xmlschema.py Fri May 5 09:06:19 2006
@@ -1,4 +1,4 @@
-# -*- coding: UTF-8 -*-
+# -*- coding: utf-8 -*-
"""
Test cases related to XML Schema parsing and validation
Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original)
+++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Fri May 5 09:06:19 2006
@@ -1,4 +1,4 @@
-# -*- coding: UTF-8 -*-
+# -*- coding: utf-8 -*-
"""
Test cases related to XPath evaluation and the XPath class
Modified: lxml/trunk/src/lxml/tests/test_xslt.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xslt.py (original)
+++ lxml/trunk/src/lxml/tests/test_xslt.py Fri May 5 09:06:19 2006
@@ -1,4 +1,4 @@
-# -*- coding: UTF-8 -*-
+# -*- coding: utf-8 -*-
"""
Test cases related to XSLT processing
From scoder at codespeak.net Fri May 5 09:15:16 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 09:15:18 2006
Subject: [Lxml-checkins] r26790 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20060505071516.DF0AE10088@code0.codespeak.net>
Author: scoder
Date: Fri May 5 09:15:08 2006
New Revision: 26790
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/tests/test_htmlparser.py
Log:
only parse file-like objects on libxml2 >= 2.6.24 due to CRLF bug, fix UTF-8 conversion in HTML file-like parser
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri May 5 09:15:08 2006
@@ -7,7 +7,9 @@
Features added
--------------
-* Parsing file-like objects now reads chunks rather than the whole file
+* Parsing file-like objects now reads chunks rather than the whole file at
+ once (only on libxml2 >= 2.6.24, older versions have a bug with CRLF line
+ endings)
* Parsing StringIO objects from the start avoids copying the string
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Fri May 5 09:15:08 2006
@@ -74,12 +74,14 @@
(c_version % 100)
)
-LIBXML_COMPILED_VERSION = __unpackIntVersion(tree.LIBXML_VERSION)
+cdef int _LIBXML_VERSION_INT
try:
- LIBXML_VERSION = __unpackIntVersion(
- int((tree.xmlParserVersion).split('-')[0]))
+ _LIBXML_VERSION_INT = int((tree.xmlParserVersion).split('-')[0])
except Exception:
- LIBXML_VERSION = (0,0,0)
+ _LIBXML_VERSION_INT = 0
+
+LIBXML_VERSION = __unpackIntVersion(_LIBXML_VERSION_INT)
+LIBXML_COMPILED_VERSION = __unpackIntVersion(tree.LIBXML_VERSION)
LXML_VERSION = __unpackDottedVersion(tree.LXML_VERSION_STRING)
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Fri May 5 09:15:08 2006
@@ -547,8 +547,8 @@
if isinstance(parser, XMLParser):
return (parser)._parseDocFromFilelike(source, c_filename)
elif isinstance(parser, HTMLParser):
- data = source.read()
- return (parser)._parseDoc(_cstr(data), c_filename)
+ data_utf = _utf8(source.read())
+ return (parser)._parseDoc(_cstr(data_utf), c_filename)
else:
raise TypeError, "invalid parser"
@@ -594,5 +594,9 @@
cdef xmlDoc* c_doc
if url is not None:
url = _utf8(url)
- c_doc = _parseDocFromFilelike(source, url, parser)
- return _documentFactory(c_doc, parser)
+ # CRLF reading bug in libxml2 <= 2.6.23
+ if _LIBXML_VERSION_INT >= 20624:
+ c_doc = _parseDocFromFilelike(source, url, parser)
+ return _documentFactory(c_doc, parser)
+ else:
+ return _parseMemoryDocument(source.read(), url, parser)
Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_htmlparser.py (original)
+++ lxml/trunk/src/lxml/tests/test_htmlparser.py Fri May 5 09:15:08 2006
@@ -1,4 +1,4 @@
-# -*- coding: UTF-8 -*-
+# -*- coding: utf-8 -*-
"""
HTML parser test cases for etree
@@ -6,6 +6,7 @@
import unittest
import tempfile
+import re
from common_imports import StringIO, etree, fileInTestDir, SillyFileLike, HelperTestCase
@@ -16,6 +17,7 @@
html_str = "test page title "
broken_html_str = "test page title"
+ uhtml_str = u"test ??\uF8D2 page ??\uF8D2 title "
def tearDown(self):
self.etree.set_default_parser()
@@ -56,6 +58,15 @@
tree = self.etree.parse(f, parser)
self.assertEqual(self.etree.tostring(tree.getroot()), self.html_str)
+ def test_module_parse_html_filelike(self):
+ parser = self.etree.HTMLParser()
+ f = SillyFileLike(self.uhtml_str)
+ tree = self.etree.parse(f, parser)
+ html = self.etree.tostring(tree.getroot())
+ for entity_name, value in re.findall("(([0-9]+);)", html):
+ html = html.replace(entity_name, unichr(int(value)))
+ self.assertEqual(html, self.uhtml_str)
+
def test_html_file_error(self):
parser = self.etree.HTMLParser()
parse = self.etree.parse
From scoder at codespeak.net Fri May 5 10:21:13 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 10:21:15 2006
Subject: [Lxml-checkins] r26792 - lxml/trunk/src/lxml/tests
Message-ID: <20060505082113.715A110088@code0.codespeak.net>
Author: scoder
Date: Fri May 5 10:21:02 2006
New Revision: 26792
Modified:
lxml/trunk/src/lxml/tests/common_imports.py
lxml/trunk/src/lxml/tests/test_htmlparser.py
Log:
tests: move unentitify utility function to common_imports
Modified: lxml/trunk/src/lxml/tests/common_imports.py
==============================================================================
--- lxml/trunk/src/lxml/tests/common_imports.py (original)
+++ lxml/trunk/src/lxml/tests/common_imports.py Fri May 5 10:21:02 2006
@@ -1,6 +1,7 @@
import unittest
import os.path
from StringIO import StringIO
+import re
from lxml import etree
@@ -81,3 +82,8 @@
f = StringIO()
tree.write_c14n(f)
return f.getvalue()
+
+def unentitify(xml):
+ for entity_name, value in re.findall("(([0-9]+);)", xml):
+ xml = xml.replace(entity_name, unichr(int(value)))
+ return xml
Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_htmlparser.py (original)
+++ lxml/trunk/src/lxml/tests/test_htmlparser.py Fri May 5 10:21:02 2006
@@ -6,9 +6,9 @@
import unittest
import tempfile
-import re
-from common_imports import StringIO, etree, fileInTestDir, SillyFileLike, HelperTestCase
+from common_imports import StringIO, etree, fileInTestDir
+from common_imports import SillyFileLike, HelperTestCase, unentitify
class HtmlParserTestCaseBase(HelperTestCase):
"""HTML parser test cases
@@ -63,9 +63,7 @@
f = SillyFileLike(self.uhtml_str)
tree = self.etree.parse(f, parser)
html = self.etree.tostring(tree.getroot())
- for entity_name, value in re.findall("(([0-9]+);)", html):
- html = html.replace(entity_name, unichr(int(value)))
- self.assertEqual(html, self.uhtml_str)
+ self.assertEqual(unentitify(html), self.uhtml_str)
def test_html_file_error(self):
parser = self.etree.HTMLParser()
From scoder at codespeak.net Fri May 5 10:36:02 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 10:36:09 2006
Subject: [Lxml-checkins] r26793 - lxml/trunk/src/lxml/tests
Message-ID: <20060505083602.E3D0510088@code0.codespeak.net>
Author: scoder
Date: Fri May 5 10:35:51 2006
New Revision: 26793
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
fix test_elementtree: when writing encoded XML, be sure to recode it to unicode before canonicalizing (API can't handle UTF-8 strings)
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Fri May 5 10:35:51 2006
@@ -1816,7 +1816,7 @@
f = StringIO()
tree = ElementTree(element=element)
tree.write(f, encoding)
- data = f.getvalue()
+ data = unicode(f.getvalue(), encoding)
return canonicalize(data)
def _writeElementFile(self, element, encoding='us-ascii'):
@@ -1829,7 +1829,7 @@
tree.write(f, encoding)
f.close()
f = open(filename, 'rb')
- data = f.read()
+ data = unicode(f.read(), encoding)
f.close()
os.close(handle)
os.remove(filename)
From scoder at codespeak.net Fri May 5 10:43:21 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 10:43:28 2006
Subject: [Lxml-checkins] r26794 - lxml/trunk/src/lxml/tests
Message-ID: <20060505084321.12ADF10088@code0.codespeak.net>
Author: scoder
Date: Fri May 5 10:43:20 2006
New Revision: 26794
Modified:
lxml/trunk/src/lxml/tests/test_unicode.py
Log:
new test case for parsing unicode from file-like object
Modified: lxml/trunk/src/lxml/tests/test_unicode.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_unicode.py (original)
+++ lxml/trunk/src/lxml/tests/test_unicode.py Fri May 5 10:43:20 2006
@@ -1,13 +1,14 @@
# -*- coding: utf-8 -*-
import unittest, doctest
-from StringIO import StringIO
-from lxml import etree
+from common_imports import StringIO, etree, SillyFileLike, unentitify
ascii_uni = u'a'
uni = u'??\uF8D2' # klingon etc.
+uxml = u"test ??\uF8D2 page ??\uF8D2 title "
+
class UnicodeTestCase(unittest.TestCase):
def test_unicode_xml(self):
tree = etree.XML(u' %s
' % uni)
@@ -40,6 +41,12 @@
el = etree.parse(StringIO(u'%s
' % uni)).getroot()
self.assertEquals(uni, el.text)
+ def test_parse_fileobject_unicode(self):
+ # parse unicode from unamed file object (not support by ElementTree)
+ f = SillyFileLike(uxml)
+ root = etree.parse(f).getroot()
+ self.assertEquals(unentitify(etree.tostring(root)), uxml)
+
def test_suite():
suite = unittest.TestSuite()
suite.addTests([unittest.makeSuite(UnicodeTestCase)])
From scoder at codespeak.net Fri May 5 10:46:47 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 10:46:53 2006
Subject: [Lxml-checkins] r26795 - lxml/trunk/src/lxml/tests
Message-ID: <20060505084647.6C0C710088@code0.codespeak.net>
Author: scoder
Date: Fri May 5 10:46:45 2006
New Revision: 26795
Modified:
lxml/trunk/src/lxml/tests/test_io.py
Log:
new test case for libxml2 <= 2.6.23 CRLF bug, fix late_error test case
Modified: lxml/trunk/src/lxml/tests/test_io.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_io.py (original)
+++ lxml/trunk/src/lxml/tests/test_io.py Fri May 5 10:46:45 2006
@@ -85,6 +85,14 @@
root = self.etree.ElementTree().parse(f)
self.assert_(root.tag.endswith('foo'))
+ def test_class_parse_fileobject_crlf(self):
+ # libxml2 <= 2.6.23 has a bug reading CRLF files in chunks
+ xml = '' + 'test\r\n ' * 100 + ' '
+ f = SillyFileLike(xml)
+ root = self.etree.ElementTree().parse(f)
+ self.assertEquals(self.etree.tostring(root).replace('\r', ''),
+ xml.replace('\r', ''))
+
def test_module_parse_large_fileobject(self):
# parse from unamed file object
f = LargeFileLike()
@@ -108,12 +116,16 @@
data = 'test'
next_char = iter(data).next
counter = 0
- def read(self, *args):
- try:
- self.counter += 1
- return self.next_char()
- except StopIteration:
- raise LocalError
+ def read(self, amount=None):
+ if amount is None:
+ while True:
+ self.read(1)
+ else:
+ try:
+ self.counter += 1
+ return self.next_char()
+ except StopIteration:
+ raise LocalError
f = TestFile()
self.assertRaises(LocalError, self.etree.parse, f)
self.assertEquals(f.counter, len(f.data)+1)
From scoder at codespeak.net Fri May 5 10:49:02 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 10:49:03 2006
Subject: [Lxml-checkins] r26796 - in lxml/trunk: . src/lxml
Message-ID: <20060505084902.038B510088@code0.codespeak.net>
Author: scoder
Date: Fri May 5 10:49:00 2006
New Revision: 26796
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/etree.pyx
Log:
fix memory leak in write_c14n if it fails to write the file after conversion
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri May 5 10:49:00 2006
@@ -48,6 +48,8 @@
Bugs fixed
----------
+* Memory leak if write_c14n fails to write the file after conversion
+
* ElementTree.xpath() and XPathDocumentEvaluator were not using the
ElementTree root node as reference point
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Fri May 5 10:49:00 2006
@@ -479,10 +479,12 @@
if bytes < 0:
raise C14NError, "C14N failed"
- if not hasattr(file, 'write'):
- file = open(file, 'wb')
- file.write(data)
- tree.xmlFree(data)
+ try:
+ if not hasattr(file, 'write'):
+ file = open(file, 'wb')
+ file.write(data)
+ finally:
+ tree.xmlFree(data)
cdef _ElementTree _elementTreeFactory(_Document doc,
_NodeBase context_node):
From scoder at codespeak.net Fri May 5 10:54:39 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 10:54:42 2006
Subject: [Lxml-checkins] r26798 - lxml/trunk/src/lxml
Message-ID: <20060505085439.D700A10090@code0.codespeak.net>
Author: scoder
Date: Fri May 5 10:54:33 2006
New Revision: 26798
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
let parse functions always check the input type is string or unicode
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Fri May 5 10:54:33 2006
@@ -560,6 +560,7 @@
############################################################
## API level helper functions for _Document creation
+## (here we convert to UTF-8)
############################################################
cdef _Document _parseDocument(source, parser):
@@ -583,11 +584,12 @@
cdef _Document _parseMemoryDocument(text, url, parser):
cdef xmlDoc* c_doc
+ text_utf = _utf8(text)
if python.PyUnicode_Check(text):
- text = _stripDeclaration(_utf8(text))
+ text_utf = _stripDeclaration(text_utf)
if url is not None:
url = _utf8(url)
- c_doc = _parseDoc(text, url, parser)
+ c_doc = _parseDoc(text_utf, url, parser)
return _documentFactory(c_doc, parser)
cdef _Document _parseFilelikeDocument(source, url, parser):
From scoder at codespeak.net Fri May 5 11:00:00 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 11:00:03 2006
Subject: [Lxml-checkins] r26800 - lxml/trunk/src/lxml/tests
Message-ID: <20060505090000.2A78B10088@code0.codespeak.net>
Author: scoder
Date: Fri May 5 10:59:58 2006
New Revision: 26800
Modified:
lxml/trunk/src/lxml/tests/test_etree.py
lxml/trunk/src/lxml/tests/test_io.py
Log:
moved CRLF test case to test_etree since it's only for libxml2 anyway
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Fri May 5 10:59:58 2006
@@ -10,9 +10,8 @@
import unittest, doctest
-from StringIO import StringIO
-
-from common_imports import etree, HelperTestCase, fileInTestDir, canonicalize
+from common_imports import etree, StringIO, HelperTestCase, fileInTestDir
+from common_imports import SillyFileLike, canonicalize
class ETreeOnlyTestCase(HelperTestCase):
"""Tests only for etree, not ElementTree"""
@@ -426,6 +425,14 @@
self.assertEquals(docinfo.root_name, 'html')
self.assertEquals(docinfo.doctype, '')
+ def test_parse_fileobject_crlf(self):
+ # libxml2 <= 2.6.23 has a bug reading CRLF files in chunks
+ xml = '' + 'test\r\n \r\n' * 10000 + ' '
+ f = SillyFileLike(xml)
+ root = self.etree.parse(f).getroot()
+ self.assertEquals(self.etree.tostring(root).replace('\r', ''),
+ xml.replace('\r', ''))
+
def _writeElement(self, element, encoding='us-ascii'):
"""Write out element for comparison.
"""
Modified: lxml/trunk/src/lxml/tests/test_io.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_io.py (original)
+++ lxml/trunk/src/lxml/tests/test_io.py Fri May 5 10:59:58 2006
@@ -85,14 +85,6 @@
root = self.etree.ElementTree().parse(f)
self.assert_(root.tag.endswith('foo'))
- def test_class_parse_fileobject_crlf(self):
- # libxml2 <= 2.6.23 has a bug reading CRLF files in chunks
- xml = '' + 'test\r\n ' * 100 + ' '
- f = SillyFileLike(xml)
- root = self.etree.ElementTree().parse(f)
- self.assertEquals(self.etree.tostring(root).replace('\r', ''),
- xml.replace('\r', ''))
-
def test_module_parse_large_fileobject(self):
# parse from unamed file object
f = LargeFileLike()
From scoder at codespeak.net Fri May 5 11:13:26 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 11:13:27 2006
Subject: [Lxml-checkins] r26801 - lxml/trunk/src/lxml
Message-ID: <20060505091326.09F0F10088@code0.codespeak.net>
Author: scoder
Date: Fri May 5 11:13:25 2006
New Revision: 26801
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
Log:
make _stripDeclaration a little more tolerant
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Fri May 5 11:13:25 2006
@@ -122,13 +122,15 @@
tree.xmlNodeDumpOutput(c_buffer, c_doc, c_next, 0, 0, encoding)
cdef object _stripDeclaration(object xml_string):
+ # this is a hack to remove the XML declaration when we encode to UTF-8
xml_string = xml_string.strip()
if xml_string[:5] == '')
if i != -1:
- if xml_string[i+2:i+3] == '\n':
+ i = i + 2
+ while xml_string[i:i+1] in '\n\r ':
i = i+1
- xml_string = xml_string[i + 2:]
+ xml_string = xml_string[i:]
return xml_string
cdef _collectText(xmlNode* c_node):
From scoder at codespeak.net Fri May 5 11:42:31 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 11:42:42 2006
Subject: [Lxml-checkins] r26802 - lxml/trunk/src/lxml/tests
Message-ID: <20060505094231.2AA8210088@code0.codespeak.net>
Author: scoder
Date: Fri May 5 11:42:29 2006
New Revision: 26802
Modified:
lxml/trunk/src/lxml/tests/common_imports.py
Log:
fix LargeFileLike in common_imports (was loosing content)
Modified: lxml/trunk/src/lxml/tests/common_imports.py
==============================================================================
--- lxml/trunk/src/lxml/tests/common_imports.py (original)
+++ lxml/trunk/src/lxml/tests/common_imports.py Fri May 5 11:42:29 2006
@@ -64,12 +64,11 @@
for element in self.more:
append(element)
result = data.getvalue()
+ data.seek(0)
+ data.truncate()
if amount:
- self.data = StringIO(result[amount:])
+ self.data.write(result[amount:])
result = result[:amount]
- else:
- data.seek(0)
- data.truncate()
return result
def fileInTestDir(name):
From scoder at codespeak.net Fri May 5 12:21:41 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 12:21:43 2006
Subject: [Lxml-checkins] r26803 - lxml/trunk/src/lxml/tests
Message-ID: <20060505102141.519E510090@code0.codespeak.net>
Author: scoder
Date: Fri May 5 12:21:40 2006
New Revision: 26803
Modified:
lxml/trunk/src/lxml/tests/common_imports.py
Log:
reduce size of large_file test case
Modified: lxml/trunk/src/lxml/tests/common_imports.py
==============================================================================
--- lxml/trunk/src/lxml/tests/common_imports.py (original)
+++ lxml/trunk/src/lxml/tests/common_imports.py Fri May 5 12:21:40 2006
@@ -34,7 +34,7 @@
return ''
class LargeFileLike:
- def __init__(self, charlen=100, depth=4, children=10):
+ def __init__(self, charlen=100, depth=4, children=5):
self.data = StringIO()
self.chars = 'a' * charlen
self.children = range(children)
From scoder at codespeak.net Fri May 5 12:37:14 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 12:37:16 2006
Subject: [Lxml-checkins] r26805 - in lxml/trunk: . src/lxml
Message-ID: <20060505103714.8869310090@code0.codespeak.net>
Author: scoder
Date: Fri May 5 12:37:12 2006
New Revision: 26805
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/parser.pxi
Log:
re-enable chunk parsing on older libxml2 versions: new CRLF bug work-around in read loop, new chunk_size keyword for XMLParser
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri May 5 12:37:12 2006
@@ -7,9 +7,7 @@
Features added
--------------
-* Parsing file-like objects now reads chunks rather than the whole file at
- once (only on libxml2 >= 2.6.24, older versions have a bug with CRLF line
- endings)
+* Parsing file-like objects now reads chunks rather than the whole file
* Parsing StringIO objects from the start avoids copying the string
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Fri May 5 12:37:12 2006
@@ -208,14 +208,14 @@
Note that you must not share parsers between threads.
"""
cdef int _parse_options
+ cdef object _chunk_size
cdef xmlParserCtxt* _file_parser_ctxt
cdef xmlParserCtxt* _memory_parser_ctxt
cdef xmlParserCtxt* _push_parser_ctxt
def __init__(self, attribute_defaults=False, dtd_validation=False,
load_dtd=False, no_network=False, ns_clean=False,
- recover=False):
+ recover=False, chunk_size=__FILE_READ_CHUNK_SIZE):
cdef int parse_options
- self._file_parser_ctxt = NULL
BaseParser.__init__(self)
parse_options = _XML_DEFAULT_PARSE_OPTIONS
@@ -235,6 +235,7 @@
parse_options = parse_options | xmlparser.XML_PARSE_RECOVER
self._parse_options = parse_options
+ self._chunk_size = int(chunk_size)
def __dealloc__(self):
if self._file_parser_ctxt != NULL:
@@ -313,18 +314,18 @@
try:
read = filelike.read
- data = read(__FILE_READ_CHUNK_SIZE)
+ data = read(self._chunk_size)
if python.PyUnicode_Check(data):
- data = _stripDeclaration(_utf8(data))
+ data = _stripDeclaration(data)
+ data = _utf8(data)
while data:
- if python.PyUnicode_Check(data):
- data = _utf8(data)
- elif not python.PyString_Check(data):
- raise TypeError, "File-like objects must return string or unicode"
+ if _LIBXML_VERSION_INT < 20624:
+ # CRLF reading bug in libxml2 <= 2.6.23
+ data = data.replace('\r\n', '\n')
success = xmlparser.xmlParseChunk(pctxt, _cstr(data), len(data), 0)
if success != 0:
return _handleParseResult(pctxt, NULL, c_filename, 0)
- data = read(__FILE_READ_CHUNK_SIZE)
+ data = _utf8( read(self._chunk_size) )
xmlparser.xmlParseChunk(pctxt, NULL, 0, 1)
except Exception:
if pctxt.myDoc is not NULL:
@@ -596,9 +597,5 @@
cdef xmlDoc* c_doc
if url is not None:
url = _utf8(url)
- # CRLF reading bug in libxml2 <= 2.6.23
- if _LIBXML_VERSION_INT >= 20624:
- c_doc = _parseDocFromFilelike(source, url, parser)
- return _documentFactory(c_doc, parser)
- else:
- return _parseMemoryDocument(source.read(), url, parser)
+ c_doc = _parseDocFromFilelike(source, url, parser)
+ return _documentFactory(c_doc, parser)
From scoder at codespeak.net Fri May 5 12:41:31 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 12:41:33 2006
Subject: [Lxml-checkins] r26806 - lxml/trunk/src/lxml/tests
Message-ID: <20060505104131.006A510090@code0.codespeak.net>
Author: scoder
Date: Fri May 5 12:41:30 2006
New Revision: 26806
Modified:
lxml/trunk/src/lxml/tests/test_etree.py
Log:
increase chance that fileobject_crlf test case captures the bug
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Fri May 5 12:41:30 2006
@@ -426,11 +426,13 @@
self.assertEquals(docinfo.doctype, '')
def test_parse_fileobject_crlf(self):
- # libxml2 <= 2.6.23 has a bug reading CRLF files in chunks
- xml = '' + 'test\r\n \r\n' * 10000 + ' '
+ # libxml2 < 2.6.23 has a bug reading CRLF files in chunks
+ etree = self.etree
+ parser = etree.XMLParser(chunk_size=3)
+ xml = '' + '\r\ntest\r\n \r\n' * 10 + ' '
f = SillyFileLike(xml)
- root = self.etree.parse(f).getroot()
- self.assertEquals(self.etree.tostring(root).replace('\r', ''),
+ root = etree.parse(f, parser).getroot()
+ self.assertEquals(etree.tostring(root).replace('\r', ''),
xml.replace('\r', ''))
def _writeElement(self, element, encoding='us-ascii'):
From scoder at codespeak.net Fri May 5 12:43:16 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 12:43:18 2006
Subject: [Lxml-checkins] r26807 - lxml/trunk/src/lxml
Message-ID: <20060505104316.DDE9E10090@code0.codespeak.net>
Author: scoder
Date: Fri May 5 12:43:15 2006
New Revision: 26807
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
do not apply CRLF bug fix to libxml2 2.6.23, only older versions
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Fri May 5 12:43:15 2006
@@ -319,8 +319,8 @@
data = _stripDeclaration(data)
data = _utf8(data)
while data:
- if _LIBXML_VERSION_INT < 20624:
- # CRLF reading bug in libxml2 <= 2.6.23
+ if _LIBXML_VERSION_INT <= 20622:
+ # CRLF reading bug in libxml2 <= 2.6.22
data = data.replace('\r\n', '\n')
success = xmlparser.xmlParseChunk(pctxt, _cstr(data), len(data), 0)
if success != 0:
From scoder at codespeak.net Fri May 5 12:47:24 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 12:47:25 2006
Subject: [Lxml-checkins] r26808 - lxml/trunk/src/lxml
Message-ID: <20060505104724.7868710090@code0.codespeak.net>
Author: scoder
Date: Fri May 5 12:47:15 2006
New Revision: 26808
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
clean up in XMLParser.__init__
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Fri May 5 12:47:15 2006
@@ -202,7 +202,7 @@
major run-time overhead.
The keyword arguments in the constructor are mainly based on the libxml2
- parser configuration. A DTD will only be loaded if validation or
+ parser configuration. A DTD will also be loaded if validation or
attribute default values are requested.
Note that you must not share parsers between threads.
@@ -222,11 +222,11 @@
if load_dtd:
parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD
if dtd_validation:
- parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \
- xmlparser.XML_PARSE_DTDVALID
+ parse_options = parse_options | xmlparser.XML_PARSE_DTDVALID | \
+ xmlparser.XML_PARSE_DTDLOAD
if attribute_defaults:
- parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \
- xmlparser.XML_PARSE_DTDATTR
+ parse_options = parse_options | xmlparser.XML_PARSE_DTDATTR | \
+ xmlparser.XML_PARSE_DTDLOAD
if no_network:
parse_options = parse_options | xmlparser.XML_PARSE_NONET
if ns_clean:
From scoder at codespeak.net Fri May 5 13:33:48 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 13:33:49 2006
Subject: [Lxml-checkins] r26810 - lxml/trunk/src/lxml
Message-ID: <20060505113348.E638E10092@code0.codespeak.net>
Author: scoder
Date: Fri May 5 13:33:48 2006
New Revision: 26810
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
doc updates
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Fri May 5 13:33:48 2006
@@ -205,7 +205,17 @@
parser configuration. A DTD will also be loaded if validation or
attribute default values are requested.
- Note that you must not share parsers between threads.
+ Available keyword arguments:
+ * attribute_defaults - read default attributes from DTD
+ * dtd_validation - validate (if DTD is available)
+ * load_dtd - use DTD for parsing
+ * no_network - prevent network access
+ * ns_clean - clean up redundant namespace declarations
+ * recover - try hard to parse through broken XML
+ * chunk_size - read this many bytes from file-like objects
+
+ Note that you must not share parsers between threads. This applies also
+ to the default parser.
"""
cdef int _parse_options
cdef object _chunk_size
@@ -216,6 +226,9 @@
load_dtd=False, no_network=False, ns_clean=False,
recover=False, chunk_size=__FILE_READ_CHUNK_SIZE):
cdef int parse_options
+ self._memory_parser_ctxt = NULL
+ self._file_parser_ctxt = NULL
+ self._push_parser_ctxt = NULL
BaseParser.__init__(self)
parse_options = _XML_DEFAULT_PARSE_OPTIONS
@@ -246,6 +259,7 @@
xmlparser.xmlFreeParserCtxt(self._push_parser_ctxt)
def copy(self):
+ "Create a new parser with the same configuration."
cdef XMLParser parser
parser = self._copy()
parser._parse_options = self._parse_options
@@ -423,6 +437,11 @@
tree. By default, it can read broken (non well-formed) HTML, depending on
the capabilities of libxml2. Use the 'recover' option to switch this off.
+ Available keyword arguments:
+ * recover - try hard to parse through broken HTML (default: True)
+ * no_network - prevent network access
+ * remove_blank_text - clean up empty text nodes
+
Note that you must not share parsers between threads.
"""
cdef int _parse_options
@@ -453,6 +472,7 @@
htmlparser.htmlFreeParserCtxt(self._memory_parser_ctxt)
def copy(self):
+ "Create a new parser with the same configuration."
cdef HTMLParser parser
parser = self._copy()
parser._parse_options = self._parse_options
From scoder at codespeak.net Fri May 5 13:36:02 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 13:36:03 2006
Subject: [Lxml-checkins] r26811 - lxml/trunk/src/lxml
Message-ID: <20060505113602.92B8310092@code0.codespeak.net>
Author: scoder
Date: Fri May 5 13:36:01 2006
New Revision: 26811
Modified:
lxml/trunk/src/lxml/relaxng.pxi
lxml/trunk/src/lxml/xmlschema.pxi
Log:
valgrind fixes
Modified: lxml/trunk/src/lxml/relaxng.pxi
==============================================================================
--- lxml/trunk/src/lxml/relaxng.pxi (original)
+++ lxml/trunk/src/lxml/relaxng.pxi Fri May 5 13:36:01 2006
@@ -24,7 +24,7 @@
cdef xmlNode* c_node
cdef xmlDoc* fake_c_doc
cdef relaxng.xmlRelaxNGParserCtxt* parser_ctxt
-
+ self._c_schema = NULL
fake_c_doc = NULL
if etree is not None:
doc = _documentOrRaise(etree)
Modified: lxml/trunk/src/lxml/xmlschema.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlschema.pxi (original)
+++ lxml/trunk/src/lxml/xmlschema.pxi Fri May 5 13:36:01 2006
@@ -23,7 +23,7 @@
cdef xmlDoc* fake_c_doc
cdef xmlNode* c_node
cdef xmlschema.xmlSchemaParserCtxt* parser_ctxt
-
+ self._c_schema = NULL
if etree is not None:
doc = _documentOrRaise(etree)
root_node = _rootNodeOf(etree)
From scoder at codespeak.net Fri May 5 15:56:08 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 15:56:10 2006
Subject: [Lxml-checkins] r26814 - lxml/trunk/src/lxml
Message-ID: <20060505135608.B875E10092@code0.codespeak.net>
Author: scoder
Date: Fri May 5 15:56:07 2006
New Revision: 26814
Modified:
lxml/trunk/src/lxml/extensions.pxi
Log:
cleanup in _createNodeSetResult
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Fri May 5 15:56:07 2006
@@ -248,7 +248,6 @@
cdef object _createNodeSetResult(xpath.xmlXPathObject* xpathObj, _Document doc):
cdef xmlNode* c_node
cdef char* s
- cdef _NodeBase element
cdef int i
result = []
if xpathObj.nodesetval is NULL:
@@ -262,18 +261,17 @@
# -> we store Python refs to these, so that is OK
# XSLT: can it leak when merging trees from multiple sources?
c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1)
- element = _elementFactory(doc, c_node)
- result.append(element)
+ value = _elementFactory(doc, c_node)
elif c_node.type == tree.XML_TEXT_NODE:
- result.append(funicode(c_node.content))
+ value = funicode(c_node.content)
elif c_node.type == tree.XML_ATTRIBUTE_NODE:
s = tree.xmlNodeGetContent(c_node)
- attr_value = funicode(s)
+ value = funicode(s)
tree.xmlFree(s)
- result.append(attr_value)
else:
print "Not yet implemented result node type:", c_node.type
raise NotImplementedError
+ python.PyList_Append(result, value)
return result
cdef void _freeXPathObject(xpath.xmlXPathObject* xpathObj):
From scoder at codespeak.net Fri May 5 15:56:40 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 15:56:41 2006
Subject: [Lxml-checkins] r26815 - lxml/trunk/src/lxml/tests
Message-ID: <20060505135640.7CFB310092@code0.codespeak.net>
Author: scoder
Date: Fri May 5 15:56:39 2006
New Revision: 26815
Modified:
lxml/trunk/src/lxml/tests/test_xpathevaluator.py
Log:
new test case for node sets as XPath variable values
Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original)
+++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Fri May 5 15:56:39 2006
@@ -182,6 +182,18 @@
self.assertEquals(1, len(r))
self.assertEquals("true", r[0].get('attr'))
+ def test_xpath_variables_nodeset(self):
+ x = self.parse(' ')
+ e = etree.XPathEvaluator(x)
+
+ element = etree.Element("test-el")
+ etree.SubElement(element, "test-sub")
+ expr = "$value"
+ r = e.evaluate(expr, value=element)
+ self.assertEquals(1, len(r))
+ self.assertEquals(element.tag, r[0].tag)
+ self.assertEquals(element[0].tag, r[0][0].tag)
+
def test_xpath_extensions_mix(self):
x = self.parse(' ')
From scoder at codespeak.net Fri May 5 15:59:50 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 15:59:51 2006
Subject: [Lxml-checkins] r26816 - lxml/trunk/src/lxml
Message-ID: <20060505135950.848AC10092@code0.codespeak.net>
Author: scoder
Date: Fri May 5 15:59:49 2006
New Revision: 26816
Modified:
lxml/trunk/src/lxml/xpath.pxd
lxml/trunk/src/lxml/xpath.pxi
Log:
cleanup in XPathContext, free variables with xmlXPathRegisteredVariablesCleanup, exceptions consistently raised in base class XPathEvaluatorBase
Modified: lxml/trunk/src/lxml/xpath.pxd
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxd (original)
+++ lxml/trunk/src/lxml/xpath.pxd Fri May 5 15:59:49 2006
@@ -109,8 +109,7 @@
char* name,
char* ns_uri,
xmlXPathObject* value)
- cdef xmlXPathObject* xmlXPathVariableLookup(xmlXPathContext* ctxt,
- char* name)
+ cdef void xmlXPathRegisteredVariablesCleanup(xmlXPathContext *ctxt)
cdef xmlXPathObject* valuePop (xmlXPathParserContext *ctxt)
cdef int valuePush(xmlXPathParserContext* ctxt, xmlXPathObject *value)
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Fri May 5 15:59:49 2006
@@ -11,11 +11,9 @@
cdef class _XPathContext(_BaseContext):
cdef object _variables
- cdef object _registered_variables
def __init__(self, namespaces, extensions, variables):
self._ext_lookup_function = _function_check
self._variables = variables
- self._registered_variables = []
_BaseContext.__init__(self, namespaces, extensions)
cdef register_context(self, xpath.xmlXPathContext* xpathCtxt, _Document doc):
@@ -32,32 +30,15 @@
xpathCtxt = self._xpathCtxt
if xpathCtxt is NULL:
return
- self._unregisterVariables()
- del self._registered_variables[:]
+ xpath.xmlXPathRegisteredVariablesCleanup(xpathCtxt)
self._unregister_context()
- cdef void _unregisterVariables(self):
- cdef xpath.xmlXPathContext* xpathCtxt
- cdef xpath.xmlXPathObject* xpathVarValue
- cdef char* c_name
- xpathCtxt = self._xpathCtxt
- for name_utf in self._registered_variables:
- c_name = _cstr(name_utf)
- xpathVarValue = xpath.xmlXPathVariableLookup(xpathCtxt, c_name)
- if xpathVarValue is not NULL:
- xpath.xmlXPathRegisterVariable(xpathCtxt, c_name, NULL)
- _freeXPathObject(xpathVarValue)
-
def registerVariables(self, variable_dict):
for name, value in variable_dict.items():
- name_utf = self._to_utf(name)
- self._registerVariable(name_utf, value)
- python.PyList_Append(self._registered_variables, name_utf)
+ self._registerVariable(self._to_utf(name), value)
def registerVariable(self, name, value):
- name_utf = self._to_utf(name)
- self._registerVariable(name_utf, value)
- python.PyList_Append(self._registered_variables, name_utf)
+ self._registerVariable(self._to_utf(name), value)
cdef void _registerVariable(self, name_utf, value):
xpath.xmlXPathRegisterVariable(
@@ -65,11 +46,24 @@
cdef class XPathEvaluatorBase:
+ cdef xpath.xmlXPathContext* _xpathCtxt
cdef _XPathContext _context
def __init__(self, namespaces, extensions, variables=None):
self._context = _XPathContext(namespaces, extensions, variables)
+ def __dealloc__(self):
+ if self._xpathCtxt is not NULL:
+ xpath.xmlXPathFreeContext(self._xpathCtxt)
+
+ cdef _raise_parse_error(self):
+ if self._xpathCtxt is not NULL and \
+ self._xpathCtxt.lastError.message is not NULL:
+ message = funicode(self._xpathCtxt.lastError.message)
+ else:
+ message = "Error in xpath expression."
+ raise XPathSyntaxError, message
+
cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc):
if self._context._exc._has_raised():
if xpathObj is not NULL:
@@ -80,17 +74,14 @@
if xpathObj is NULL:
self._context._release_temp_refs()
- raise XPathSyntaxError, "Error in xpath expression."
+ self._raise_parse_error()
try:
result = _unwrapXPathObject(xpathObj, doc)
- except XPathResultError:
+ finally:
_freeXPathObject(xpathObj)
self._context._release_temp_refs()
- raise
- _freeXPathObject(xpathObj)
- self._context._release_temp_refs()
return result
@@ -99,7 +90,6 @@
XPath evaluators must not be shared between threads.
"""
- cdef xpath.xmlXPathContext* _c_ctxt
cdef _Element _element
def __init__(self, _NodeBase element not None, namespaces=None, extensions=None):
cdef xpath.xmlXPathContext* xpathCtxt
@@ -107,16 +97,12 @@
cdef _Document doc
doc = element._doc
xpathCtxt = xpath.xmlXPathNewContext(doc._c_doc)
+ self._xpathCtxt = xpathCtxt
if xpathCtxt is NULL:
raise XPathContextError, "Unable to create new XPath context"
self._element = element
- self._c_ctxt = xpathCtxt
XPathEvaluatorBase.__init__(self, namespaces, extensions)
- def __dealloc__(self):
- if self._c_ctxt is not NULL:
- xpath.xmlXPathFreeContext(self._c_ctxt)
-
def registerNamespace(self, prefix, uri):
"""Register a namespace with the XPath context.
"""
@@ -137,7 +123,7 @@
cdef xpath.xmlXPathObject* xpathObj
cdef xmlNode* c_node
cdef _Document doc
- xpathCtxt = self._c_ctxt
+ xpathCtxt = self._xpathCtxt
xpathCtxt.node = self._element._c_node
doc = self._element._doc
@@ -173,9 +159,7 @@
cdef class XPath(XPathEvaluatorBase):
- cdef xpath.xmlXPathContext* _xpathCtxt
cdef xpath.xmlXPathCompExpr* _xpath
- cdef object _prefix_map
cdef readonly object path
def __init__(self, path, namespaces=None, extensions=None):
@@ -184,7 +168,7 @@
path = _utf8(path)
self._xpath = xpath.xmlXPathCompile(_cstr(path))
if self._xpath is NULL:
- raise XPathSyntaxError, "Error in XPath expression"
+ self._raise_parse_error()
self._xpathCtxt = xpath.xmlXPathNewContext(NULL)
def __call__(self, _etree_or_element, **_variables):
@@ -214,8 +198,6 @@
return self(_tree, **_variables)
def __dealloc__(self):
- if self._xpathCtxt is not NULL:
- xpath.xmlXPathFreeContext(self._xpathCtxt)
if self._xpath is not NULL:
xpath.xmlXPathFreeCompExpr(self._xpath)
From scoder at codespeak.net Fri May 5 19:27:15 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 19:27:16 2006
Subject: [Lxml-checkins] r26821 - in lxml/trunk: doc src/lxml src/lxml/tests
Message-ID: <20060505172715.42D7A10092@code0.codespeak.net>
Author: scoder
Date: Fri May 5 19:27:10 2006
New Revision: 26821
Modified:
lxml/trunk/doc/extensions.txt
lxml/trunk/src/lxml/extensions.pxi
lxml/trunk/src/lxml/python.pxd
lxml/trunk/src/lxml/tests/test_xslt.py
lxml/trunk/src/lxml/xpath.pxd
lxml/trunk/src/lxml/xslt.pxi
Log:
fix and describe (doctests) API for evaluator-local extension functions
Modified: lxml/trunk/doc/extensions.txt
==============================================================================
--- lxml/trunk/doc/extensions.txt (original)
+++ lxml/trunk/doc/extensions.txt Fri May 5 19:27:10 2006
@@ -144,6 +144,85 @@
XPathSyntaxError: Error in xpath expression.
+Evaluator-local extensions
+--------------------------
+
+Apart from the global registration of extension functions, there is also a way
+of making extensions known to a single Evaluator or XSLT. All evaluators and
+the XSLT object accept a keyword argument ``extensions`` in their constructor.
+The value is a dictionary mapping (namespace, name) tuples to functions::
+
+ >>> extensions = {('local-ns', 'local-hello') : hello}
+ >>> namespaces = {'l' : 'local-ns'}
+
+ >>> e = etree.XPathEvaluator(doc, namespaces=namespaces, extensions=extensions)
+ >>> print e.evaluate('l:local-hello(string(b))')
+ Hello Haegar
+
+For larger numbers of extension functions, you can define classes or modules
+and use the ``Extension`` helper::
+
+ >>> class MyExt:
+ ... def function1(self, _, arg):
+ ... return '1'+arg
+ ... def function2(self, _, arg):
+ ... return '2'+arg
+ ... def function3(self, _, arg):
+ ... return '3'+arg
+
+ >>> ext_module = MyExt()
+ >>> functions = ('function1', 'function2')
+ >>> extensions = etree.Extension( ext_module, functions, 'local-ns' )
+
+ >>> e = etree.XPathEvaluator(doc, namespaces=namespaces, extensions=extensions)
+ >>> print e.evaluate('l:function1(string(b))')
+ 1Haegar
+
+The second argument to ``Extension`` can either be be a sequence of names to
+select from the module, a dictionary that explicitly maps function names to
+their XPath alter-ego or ``None`` (explicitly passed) to take all available
+functions under their original name (if their name does not start with '_').
+
+The third argument takes a namespace URI or ``None`` (also if left out) for
+the default namespace. The following examples will therefore all do the same
+thing::
+
+ >>> functions = ('function1', 'function2', 'function3')
+ >>> extensions = etree.Extension( ext_module, functions )
+ >>> e = etree.XPathEvaluator(doc, extensions=extensions)
+ >>> print e.evaluate('function1(function2(function3(string(b))))')
+ 123Haegar
+
+ >>> extensions = etree.Extension( ext_module, functions, None )
+ >>> e = etree.XPathEvaluator(doc, extensions=extensions)
+ >>> print e.evaluate('function1(function2(function3(string(b))))')
+ 123Haegar
+
+ >>> extensions = etree.Extension( ext_module, None )
+ >>> e = etree.XPathEvaluator(doc, extensions=extensions)
+ >>> print e.evaluate('function1(function2(function3(string(b))))')
+ 123Haegar
+
+ >>> functions = {
+ ... 'function1' : 'function1',
+ ... 'function2' : 'function2',
+ ... 'function3' : 'function3'
+ ... }
+ >>> extensions = etree.Extension( ext_module, functions )
+ >>> e = etree.XPathEvaluator(doc, extensions=extensions)
+ >>> print e.evaluate('function1(function2(function3(string(b))))')
+ 123Haegar
+
+For convenience, you can also pass a sequence of extensions::
+
+ >>> extensions1 = etree.Extension( ext_module, None )
+ >>> extensions2 = etree.Extension( ext_module, None, 'local-ns' )
+ >>> e = etree.XPathEvaluator(doc, extensions=[extensions1, extensions2],
+ ... namespaces=namespaces)
+ >>> print e.evaluate('function1(l:function2(function3(string(b))))')
+ 123Haegar
+
+
What to return from a function
------------------------------
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Fri May 5 19:27:10 2006
@@ -34,17 +34,18 @@
self._function_cache = {}
self._called_function = None
- # convert old format extensions to UTF-8
- if isinstance(extensions, (list, tuple)):
+ if extensions is not None:
+ # convert extensions to UTF-8
+ if python.PyDict_Check(extensions):
+ extensions = (extensions,)
+ # format: [ {(ns,name):function} ] -> {(ns_utf,name_utf):function}
new_extensions = {}
for extension in extensions:
for (ns_uri, name), function in extension.items():
ns_utf = self._to_utf(ns_uri)
name_utf = self._to_utf(name)
- try:
- new_extensions[ns_utf][name_utf] = function
- except KeyError:
- new_extensions[ns_utf] = {name_utf : function}
+ python.PyDict_SetItem(
+ new_extensions, (ns_utf, name_utf), function)
extensions = new_extensions or None
self._doc = None
@@ -81,7 +82,7 @@
self._xpathCtxt, self._ext_lookup_function, self)
cdef _unregister_context(self):
- self._unregisterNamespaces()
+ xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt)
self._free_context()
cdef _free_context(self):
@@ -107,13 +108,6 @@
prefix_utf = self._to_utf(prefix)
ns_uri_utf = self._to_utf(ns_uri)
xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, ns_uri_utf)
- python.PyList_Append(self._registered_namespaces, prefix_utf)
-
- cdef _unregisterNamespaces(self):
- cdef xpath.xmlXPathContext* xpathCtxt
- xpathCtxt = self._xpathCtxt
- for prefix_utf in self._registered_namespaces:
- xpath.xmlXPathRegisterNs(xpathCtxt, prefix_utf, NULL)
# extension functions
@@ -126,9 +120,8 @@
self._called_function = function
return function is not None
- dict_result = python.PyDict_GetItem(self._extensions, ns_uri_utf)
- if dict_result is not NULL:
- dict_result = python.PyDict_GetItem(dict_result, name_utf)
+ if self._extensions is not None:
+ dict_result = python.PyDict_GetItem(self._extensions, key)
if dict_result is not NULL:
function = dict_result
else:
@@ -165,11 +158,22 @@
self._temp_refs.add(element._doc)
-def Extension(module, function_mapping, ns_uri=None):
- functions = []
- for function_name, xpath_name in function_mapping.items():
- functions[xpath_name] = getattr(module, function_name)
- return {ns_uri : functions}
+def Extension(module, function_mapping, ns=None):
+ functions = {}
+ if python.PyDict_Check(function_mapping):
+ for function_name, xpath_name in function_mapping.items():
+ python.PyDict_SetItem(functions, (ns, xpath_name),
+ getattr(module, function_name))
+ else:
+ if function_mapping is None:
+ function_mapping = []
+ for name in dir(module):
+ if not name.startswith('_'):
+ python.PyList_Append(function_mapping, name)
+ for function_name in function_mapping:
+ python.PyDict_SetItem(functions, (ns, function_name),
+ getattr(module, function_name))
+ return functions
################################################################################
Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd (original)
+++ lxml/trunk/src/lxml/python.pxd Fri May 5 19:27:10 2006
@@ -33,6 +33,7 @@
cdef object PySequence_Tuple(object o)
cdef object PyTuple_GET_ITEM(object o, int pos)
+ cdef int PyDict_Check(object instance)
cdef int PyNumber_Check(object instance)
cdef int PyBool_Check(object instance)
cdef int PySequence_Check(object instance)
Modified: lxml/trunk/src/lxml/tests/test_xslt.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xslt.py (original)
+++ lxml/trunk/src/lxml/tests/test_xslt.py Fri May 5 19:27:10 2006
@@ -341,7 +341,7 @@
def mytext(ctxt, values):
return 'X' * len(values)
- result = tree.xslt(style, {'testns' : {'mytext' : mytext}})
+ result = tree.xslt(style, {('testns', 'mytext') : mytext})
self.assertEquals(self._rootstring(result),
'X ')
Modified: lxml/trunk/src/lxml/xpath.pxd
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxd (original)
+++ lxml/trunk/src/lxml/xpath.pxd Fri May 5 19:27:10 2006
@@ -110,6 +110,7 @@
char* ns_uri,
xmlXPathObject* value)
cdef void xmlXPathRegisteredVariablesCleanup(xmlXPathContext *ctxt)
+ cdef void xmlXPathRegisteredNsCleanup(xmlXPathContext *ctxt)
cdef xmlXPathObject* valuePop (xmlXPathParserContext *ctxt)
cdef int valuePush(xmlXPathParserContext* ctxt, xmlXPathObject *value)
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Fri May 5 19:27:10 2006
@@ -153,15 +153,9 @@
self._release_temp_refs()
cdef _registerLocalExtensionFunction(self, ns_utf, name_utf, function):
- extensions = self._extensions
- if extensions is None:
- self._extensions = {ns_utf:{name_utf:function}}
- else:
- if ns_utf in extensions:
- ns_extensions = extensions[ns_utf]
- else:
- ns_extensions = extensions[ns_utf] = {}
- python.PyDict_SetItem(ns_extensions, name_utf, function)
+ if self._extensions is None:
+ self._extensions = {}
+ python.PyDict_SetItem(self._extensions, (ns_utf, name_utf), function)
cdef class _ExsltRegExp # forward declaration
From scoder at codespeak.net Fri May 5 19:48:26 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 5 19:48:27 2006
Subject: [Lxml-checkins] r26824 - lxml/trunk/src/lxml
Message-ID: <20060505174826.86B9010094@code0.codespeak.net>
Author: scoder
Date: Fri May 5 19:48:23 2006
New Revision: 26824
Modified:
lxml/trunk/src/lxml/cstd.pxd
lxml/trunk/src/lxml/extensions.pxi
lxml/trunk/src/lxml/proxy.pxi
lxml/trunk/src/lxml/python.pxd
lxml/trunk/src/lxml/xpath.pxi
lxml/trunk/src/lxml/xslt.pxi
Log:
clean up, use Python memory management instead of plain C-malloc
Modified: lxml/trunk/src/lxml/cstd.pxd
==============================================================================
--- lxml/trunk/src/lxml/cstd.pxd (original)
+++ lxml/trunk/src/lxml/cstd.pxd Fri May 5 19:48:23 2006
@@ -1,8 +1,4 @@
-cdef extern from "stdlib.h":
- cdef void* malloc(int size)
- void free(void* ptr)
-
cdef extern from "stdarg.h":
ctypedef void *va_list
void va_start(va_list ap, void *last)
@@ -11,4 +7,3 @@
cdef extern from "etree.h":
cdef int va_int(va_list ap)
cdef char *va_charptr(va_list ap)
-
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Fri May 5 19:48:23 2006
@@ -20,7 +20,6 @@
cdef _Document _doc
cdef object _extensions
cdef object _namespaces
- cdef object _registered_namespaces
cdef object _utf_refs
cdef object _function_cache
cdef object _called_function
@@ -52,7 +51,6 @@
self._exc = _ExceptionContext()
self._extensions = extensions
self._namespaces = namespaces
- self._registered_namespaces = []
self._temp_refs = _TempStore()
cdef object _to_utf(self, s):
@@ -71,7 +69,7 @@
self._xpathCtxt = xpathCtxt
xpathCtxt.userData = self
- cdef _register_context(self, _Document doc, int allow_none_namespace):
+ cdef _register_context(self, _Document doc):
self._doc = doc
self._exc.clear()
python.PyDict_Clear(self._function_cache)
@@ -86,7 +84,6 @@
self._free_context()
cdef _free_context(self):
- del self._registered_namespaces[:]
python.PyDict_Clear(self._utf_refs)
self._doc = None
if self._xpathCtxt is not NULL:
Modified: lxml/trunk/src/lxml/proxy.pxi
==============================================================================
--- lxml/trunk/src/lxml/proxy.pxi (original)
+++ lxml/trunk/src/lxml/proxy.pxi Fri May 5 19:48:23 2006
@@ -41,7 +41,7 @@
return
# XXX should we check whether we ran into proxy_type before?
#print "registering for:", proxy._c_node
- ref = cstd.malloc(sizeof(ProxyRef))
+ ref = python.PyMem_Malloc(sizeof(ProxyRef))
ref.proxy = proxy
ref.type = proxy_type
ref.next = c_node._private
@@ -59,7 +59,7 @@
ref = c_node._private
if ref.proxy == proxy_ref:
c_node._private = ref.next
- cstd.free(ref)
+ python.PyMem_Free(ref)
return
prev_ref = ref
#print "First registered is:", ref.type
@@ -68,7 +68,7 @@
#print "Registered is:", ref.type
if ref.proxy == proxy_ref:
prev_ref.next = ref.next
- cstd.free(ref)
+ python.PyMem_Free(ref)
return
prev_ref = ref
ref = ref.next
Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd (original)
+++ lxml/trunk/src/lxml/python.pxd Fri May 5 19:48:23 2006
@@ -2,6 +2,7 @@
cdef extern from "Python.h":
ctypedef struct PyObject
+ ctypedef int size_t
cdef FILE* PyFile_AsFile(PyObject* p)
cdef int PyFile_Check(object p)
@@ -39,6 +40,9 @@
cdef int PySequence_Check(object instance)
cdef int PyType_Check(object instance)
+ cdef void* PyMem_Malloc(size_t size)
+ cdef void PyMem_Free(void* p)
+
cdef extern from "etree.h": # redefines some functions as macros
cdef int isinstance(object instance, object classes)
cdef int issubclass(object derived, object superclasses)
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Fri May 5 19:48:23 2006
@@ -21,7 +21,7 @@
ns_prefixes = _find_all_extension_prefixes()
if ns_prefixes:
self.registerNamespaces(ns_prefixes)
- self._register_context(doc, 1)
+ self._register_context(doc)
if self._variables is not None:
self.registerVariables(self._variables)
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Fri May 5 19:48:23 2006
@@ -139,7 +139,7 @@
_Document doc):
self._xsltCtxt = xsltCtxt
self._set_xpath_context(xsltCtxt.xpathCtxt)
- self._register_context(doc, 0)
+ self._register_context(doc)
xsltCtxt.xpathCtxt.userData = self
cdef free_context(self):
@@ -260,10 +260,10 @@
# allocate space for parameters
# * 2 as we want an entry for both key and value,
# and + 1 as array is NULL terminated
- params = cstd.malloc(sizeof(char*) * (len(_kw) * 2 + 1))
+ params = python.PyMem_Malloc(sizeof(char*) * (len(_kw) * 2 + 1))
i = 0
keep_ref = []
- for key, value in _kw.items():
+ for key, value in _kw.iteritems():
k = _utf8(key)
python.PyList_Append(keep_ref, k)
v = _utf8(value)
@@ -285,7 +285,7 @@
if params is not NULL:
# deallocate space for parameters
- cstd.free(params)
+ python.PyMem_Free(params)
self._context.free_context()
c_doc._private = ptemp # restore _private before _destroyFakeDoc!
From scoder at codespeak.net Sat May 6 10:07:33 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Sat May 6 10:07:34 2006
Subject: [Lxml-checkins] r26844 - lxml/trunk/src/lxml
Message-ID: <20060506080733.BE7C01007E@code0.codespeak.net>
Author: scoder
Date: Sat May 6 10:07:32 2006
New Revision: 26844
Modified:
lxml/trunk/src/lxml/xslt.pxi
Log:
cleanup
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Sat May 6 10:07:32 2006
@@ -234,7 +234,7 @@
cdef xmlDoc* c_doc
cdef char** params
cdef void* ptemp
- cdef int i
+ cdef int i, kw_count
input_doc = _documentOrRaise(_input)
root_node = _rootNodeOf(_input)
@@ -256,11 +256,13 @@
ptemp = c_doc._private
c_doc._private = resolver_context
- if _kw:
+ kw_count = python.PyDict_Size(_kw)
+ if kw_count > 0:
# allocate space for parameters
# * 2 as we want an entry for both key and value,
# and + 1 as array is NULL terminated
- params = python.PyMem_Malloc(sizeof(char*) * (len(_kw) * 2 + 1))
+ params = python.PyMem_Malloc(
+ sizeof(char*) * (kw_count * 2 + 1))
i = 0
keep_ref = []
for key, value in _kw.iteritems():
From scoder at codespeak.net Sat May 6 10:54:27 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Sat May 6 10:54:28 2006
Subject: [Lxml-checkins] r26848 - lxml/trunk/src/lxml
Message-ID: <20060506085427.7AE0310082@code0.codespeak.net>
Author: scoder
Date: Sat May 6 10:54:25 2006
New Revision: 26848
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/etree.h
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/python.pxd
lxml/trunk/src/lxml/xslt.pxi
Log:
support Py_ssize_t in Python 2.5 (compiling under Py2.5/64bit needs patched Pyrex)
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Sat May 6 10:54:25 2006
@@ -140,7 +140,7 @@
If there was no text to collect, return None
"""
- cdef int scount
+ cdef Py_ssize_t scount
cdef char* text
cdef xmlNode* c_node_cur
# check for multiple text nodes
@@ -179,17 +179,17 @@
tree.xmlFreeNode(c_node)
c_node = c_next
-cdef xmlNode* _findChild(xmlNode* c_node, int index):
+cdef xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index):
if index < 0:
return _findChildBackwards(c_node, -index - 1)
else:
return _findChildForwards(c_node, index)
-cdef xmlNode* _findChildForwards(xmlNode* c_node, int index):
+cdef xmlNode* _findChildForwards(xmlNode* c_node, Py_ssize_t index):
"""Return child element of c_node with index, or return NULL if not found.
"""
cdef xmlNode* c_child
- cdef int c
+ cdef Py_ssize_t c
c_child = c_node.children
c = 0
while c_child is not NULL:
@@ -201,12 +201,12 @@
else:
return NULL
-cdef xmlNode* _findChildBackwards(xmlNode* c_node, int index):
+cdef xmlNode* _findChildBackwards(xmlNode* c_node, Py_ssize_t index):
"""Return child element of c_node with index, or return NULL if not found.
Search from the end.
"""
cdef xmlNode* c_child
- cdef int c
+ cdef Py_ssize_t c
c_child = c_node.last
c = 0
while c_child is not NULL:
@@ -255,16 +255,11 @@
c_target = c_tail
c_tail = c_next
-### see etree.h:
-## cdef int _isElement(xmlNode* c_node):
-## return (c_node.type == tree.XML_ELEMENT_NODE or
-## c_node.type == tree.XML_COMMENT_NODE)
-
-cdef xmlNode* _deleteSlice(xmlNode* c_node, int start, int stop):
+cdef xmlNode* _deleteSlice(xmlNode* c_node, Py_ssize_t start, Py_ssize_t stop):
"""Delete slice, starting with c_node, start counting at start, end at stop.
"""
cdef xmlNode* c_next
- cdef int c
+ cdef Py_ssize_t c
if c_node is NULL:
return NULL
# now start deleting nodes
Modified: lxml/trunk/src/lxml/etree.h
==============================================================================
--- lxml/trunk/src/lxml/etree.h (original)
+++ lxml/trunk/src/lxml/etree.h Sat May 6 10:54:25 2006
@@ -1,6 +1,17 @@
#ifndef HAS_ETREE_H
#define HAS_ETREE_H
+/* Py_ssize_t support was added in Python 2.5 */
+#if PY_VERSION_HEX < 0x02050000
+#ifndef PY_SSIZE_T_MAX /* patched Pyrex? */
+ typedef int Py_ssize_t;
+ #define PY_SSIZE_T_MAX INT_MAX
+ #define PY_SSIZE_T_MIN INT_MIN
+ #define PyInt_FromSsize_t(z) PyInt_FromLong(z)
+ #define PyInt_AsSsize_t(o) PyInt_AsLong(o)
+#endif
+#endif
+
#define isinstance(o,c) PyObject_IsInstance(o,c)
#define issubclass(c,csuper) PyObject_IsSubclass(c,csuper)
#define hasattr(o,a) PyObject_HasAttr(o,a)
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Sat May 6 10:54:25 2006
@@ -1,7 +1,7 @@
cimport tree, python
from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement
from python cimport isinstance, issubclass, hasattr, callable
-from python cimport iter, str, _cstr
+from python cimport iter, str, _cstr, Py_ssize_t
cimport xinclude
cimport c14n
cimport cstd
@@ -505,7 +505,7 @@
# MANIPULATORS
- def __setitem__(self, index, _NodeBase element):
+ def __setitem__(self, Py_ssize_t index, _NodeBase element):
cdef xmlNode* c_node
cdef xmlNode* c_next
cdef int foreign
@@ -519,7 +519,7 @@
_moveTail(c_next, element._c_node)
changeDocumentBelow(element, self._doc, foreign)
- def __delitem__(self, index):
+ def __delitem__(self, Py_ssize_t index):
cdef xmlNode* c_node
c_node = _findChild(self._c_node, index)
if c_node is NULL:
@@ -527,12 +527,12 @@
_removeText(c_node.next)
_removeNode(c_node)
- def __delslice__(self, start, stop):
+ def __delslice__(self, Py_ssize_t start, Py_ssize_t stop):
cdef xmlNode* c_node
c_node = _findChild(self._c_node, start)
_deleteSlice(c_node, start, stop)
- def __setslice__(self, start, stop, value):
+ def __setslice__(self, Py_ssize_t start, Py_ssize_t stop, value):
cdef xmlNode* c_node
cdef xmlNode* c_next
cdef _Element mynode
@@ -713,17 +713,17 @@
def __repr__(self):
return "" % (self.tag, id(self))
- def __getitem__(self, index):
+ def __getitem__(self, Py_ssize_t index):
cdef xmlNode* c_node
c_node = _findChild(self._c_node, index)
if c_node is NULL:
raise IndexError, "list index out of range"
return _elementFactory(self._doc, c_node)
- def __getslice__(self, start, stop):
+ def __getslice__(self, Py_ssize_t start, Py_ssize_t stop):
cdef xmlNode* c_node
cdef _Document doc
- cdef int c, c_stop
+ cdef Py_ssize_t c
# this does not work for negative start, stop, however,
# python seems to convert these to positive start, stop before
# calling, so this all works perfectly (at the cost of a len() call)
@@ -731,10 +731,9 @@
if c_node is NULL:
return []
c = start
- c_stop = stop
result = []
doc = self._doc
- while c_node is not NULL and c < c_stop:
+ while c_node is not NULL and c < stop:
if _isElement(c_node):
ret = python.PyList_Append(result, _elementFactory(doc, c_node))
if ret:
@@ -744,7 +743,7 @@
return result
def __len__(self):
- cdef int c
+ cdef Py_ssize_t c
cdef xmlNode* c_node
c = 0
c_node = self._c_node.children
@@ -766,10 +765,8 @@
return ElementChildIterator(self, reversed=True)
def index(self, _Element x not None, start=None, stop=None):
- cdef int k
- cdef int l
- cdef int c_stop
- cdef int c_start
+ cdef Py_ssize_t k, l
+ cdef Py_ssize_t c_start, c_stop
cdef xmlNode* c_child
cdef xmlNode* c_start_node
c_child = x._c_node
@@ -830,7 +827,7 @@
return k
else:
return k
- if c_start or c_stop:
+ if c_start != 0 or c_stop != 0:
raise ValueError, "list.index(x): x not in slice"
else:
raise ValueError, "list.index(x): x not in list"
@@ -1053,7 +1050,7 @@
return result
def __len__(self):
- cdef int c
+ cdef Py_ssize_t c
cdef xmlNode* c_node
c = 0
c_node = (self._c_node.properties)
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Sat May 6 10:54:25 2006
@@ -483,7 +483,6 @@
"""
cdef xmlDoc* result
cdef xmlParserCtxt* pctxt
- cdef int c_len
cdef int recover
self._error_log.connect()
pctxt = self._memory_parser_ctxt
Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd (original)
+++ lxml/trunk/src/lxml/python.pxd Sat May 6 10:54:25 2006
@@ -3,6 +3,7 @@
cdef extern from "Python.h":
ctypedef struct PyObject
ctypedef int size_t
+ ctypedef int Py_ssize_t
cdef FILE* PyFile_AsFile(PyObject* p)
cdef int PyFile_Check(object p)
@@ -13,14 +14,14 @@
cdef object PyUnicode_FromEncodedObject(object s, char* encoding,
char* errors)
- cdef object PyUnicode_DecodeUTF8(char* s, int size, char* errors)
+ cdef object PyUnicode_DecodeUTF8(char* s, Py_ssize_t size, char* errors)
cdef object PyUnicode_AsUTF8String(object ustring)
- cdef object PyString_FromStringAndSize(char* s, int size)
+ cdef object PyString_FromStringAndSize(char* s, Py_ssize_t size)
cdef object PyString_FromString(char* s)
cdef object PyString_FromFormat(char* format, ...)
cdef object PyBool_FromLong(long value)
- cdef int PyList_GET_SIZE(object l)
+ cdef Py_ssize_t PyList_GET_SIZE(object l)
cdef int PyList_Append(object l, object obj)
cdef int PyList_Reverse(object l)
cdef int PyDict_SetItemString(object d, char* key, object value)
@@ -29,10 +30,11 @@
cdef PyObject* PyDict_GetItem(object d, object key)
cdef int PyDict_DelItem(object d, object key)
cdef int PyDict_Clear(object d)
+ cdef Py_ssize_t PyDict_Size(object d)
cdef object PyList_AsTuple(object o)
cdef object PySequence_List(object o)
cdef object PySequence_Tuple(object o)
- cdef object PyTuple_GET_ITEM(object o, int pos)
+ cdef object PyTuple_GET_ITEM(object o, Py_ssize_t pos)
cdef int PyDict_Check(object instance)
cdef int PyNumber_Check(object instance)
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Sat May 6 10:54:25 2006
@@ -234,7 +234,7 @@
cdef xmlDoc* c_doc
cdef char** params
cdef void* ptemp
- cdef int i, kw_count
+ cdef Py_ssize_t i, kw_count
input_doc = _documentOrRaise(_input)
root_node = _rootNodeOf(_input)
From scoder at codespeak.net Sat May 6 14:57:47 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Sat May 6 14:57:48 2006
Subject: [Lxml-checkins] r26870 - lxml/trunk/src/lxml
Message-ID: <20060506125747.05F211007E@code0.codespeak.net>
Author: scoder
Date: Sat May 6 14:57:46 2006
New Revision: 26870
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/cstd.pxd
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/relaxng.pxi
lxml/trunk/src/lxml/tree.pxd
lxml/trunk/src/lxml/xmlerror.pxi
lxml/trunk/src/lxml/xmlschema.pxi
lxml/trunk/src/lxml/xslt.pxi
Log:
cleanup: moved strstr/strcmp/etc. from tree.pxd to cstd.pxd
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Sat May 6 14:57:46 2006
@@ -286,7 +286,7 @@
cdef object funicode(char* s):
if isutf8(s):
- return python.PyUnicode_DecodeUTF8(s, tree.strlen(s), NULL)
+ return python.PyUnicode_DecodeUTF8(s, cstd.strlen(s), NULL)
return python.PyString_FromString(s)
cdef object _utf8(object s):
Modified: lxml/trunk/src/lxml/cstd.pxd
==============================================================================
--- lxml/trunk/src/lxml/cstd.pxd (original)
+++ lxml/trunk/src/lxml/cstd.pxd Sat May 6 14:57:46 2006
@@ -1,4 +1,11 @@
+cdef extern from "stdio.h":
+ ctypedef struct FILE
+ cdef int strlen(char* s)
+ cdef char* strstr(char* haystack, char* needle)
+ cdef int strcmp(char* s1, char* s2)
+ cdef int strncmp(char* s1, char* s2, int len)
+
cdef extern from "stdarg.h":
ctypedef void *va_list
void va_start(va_list ap, void *last)
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Sat May 6 14:57:46 2006
@@ -1259,11 +1259,11 @@
return node
cdef int _tagMatches(self, xmlNode* c_node):
- if tree.strcmp(c_node.name, self._name) == 0:
+ if cstd.strcmp(c_node.name, self._name) == 0:
if c_node.ns == NULL or c_node.ns.href == NULL:
return self._href == NULL
else:
- return tree.strcmp(c_node.ns.href, self._href) == 0
+ return cstd.strcmp(c_node.ns.href, self._href) == 0
return 0
cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf,
Modified: lxml/trunk/src/lxml/relaxng.pxi
==============================================================================
--- lxml/trunk/src/lxml/relaxng.pxi (original)
+++ lxml/trunk/src/lxml/relaxng.pxi Sat May 6 14:57:46 2006
@@ -32,7 +32,7 @@
c_node = root_node._c_node
# work around for libxml2 bug if document is not RNG at all
if c_node.ns is NULL or c_node.ns.href is NULL or \
- tree.strcmp(c_node.ns.href,
+ cstd.strcmp(c_node.ns.href,
'http://relaxng.org/ns/structure/1.0') != 0:
raise RelaxNGParseError, "Document is not Relax NG"
fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Sat May 6 14:57:46 2006
@@ -1,11 +1,4 @@
-#from xmlparser cimport xmlDict
-
-cdef extern from "stdio.h":
- ctypedef struct FILE
- cdef int strlen(char* s)
- cdef char* strstr(char* haystack, char* needle)
- cdef int strcmp(char* s1, char* s2)
- cdef int strncmp(char* s1, char* s2, int len)
+from cstd cimport FILE
cdef extern from "lxml-version.h":
cdef char* LXML_VERSION_STRING
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Sat May 6 14:57:46 2006
@@ -34,7 +34,7 @@
self.level = error.level
self.line = error.line
self.message = python.PyString_FromStringAndSize(
- error.message, tree.strlen(error.message) - 1) # strip EOL
+ error.message, cstd.strlen(error.message) - 1) # strip EOL
if error.file is NULL:
self.filename = ''
else:
@@ -259,7 +259,7 @@
cdef char* c_filename
cdef char* c_element
cdef int c_line
- if __DEBUG == 0 or msg == NULL or tree.strlen(msg) < 10:
+ if __DEBUG == 0 or msg == NULL or cstd.strlen(msg) < 10:
return
if c_log_handler is not NULL:
log_handler = <_ErrorLog>c_log_handler
@@ -267,19 +267,19 @@
log_handler = __GLOBAL_ERROR_LOG
cstd.va_start(args, msg)
- if tree.strncmp(msg, '%s:', 3) == 0:
+ if cstd.strncmp(msg, '%s:', 3) == 0:
c_text = cstd.va_charptr(args)
else:
c_text = NULL
- if tree.strstr(msg, 'file %s') is not NULL:
+ if cstd.strstr(msg, 'file %s') is not NULL:
c_filename = cstd.va_charptr(args)
else:
c_filename = NULL
- if tree.strstr(msg, 'line %d') is not NULL:
+ if cstd.strstr(msg, 'line %d') is not NULL:
c_line = cstd.va_int(args)
else:
c_line = -1
- if tree.strstr(msg, 'element %s') is not NULL:
+ if cstd.strstr(msg, 'element %s') is not NULL:
c_element = cstd.va_charptr(args)
else:
c_element = NULL
@@ -297,8 +297,8 @@
message = ""
try:
- if c_filename is not NULL and tree.strlen(c_filename) > 0:
- if tree.strncmp(c_filename, 'XSLT:', 5) == 0:
+ if c_filename is not NULL and cstd.strlen(c_filename) > 0:
+ if cstd.strncmp(c_filename, 'XSLT:', 5) == 0:
filename = ''
else:
filename = funicode(c_filename)
Modified: lxml/trunk/src/lxml/xmlschema.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlschema.pxi (original)
+++ lxml/trunk/src/lxml/xmlschema.pxi Sat May 6 14:57:46 2006
@@ -31,7 +31,7 @@
# work around for libxml2 bug if document is not XML schema at all
c_node = root_node._c_node
if c_node.ns is NULL or c_node.ns.href is NULL or \
- tree.strcmp(c_node.ns.href, 'http://www.w3.org/2001/XMLSchema') != 0:
+ cstd.strcmp(c_node.ns.href, 'http://www.w3.org/2001/XMLSchema') != 0:
raise XMLSchemaParseError, "Document is not XML Schema"
fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Sat May 6 14:57:46 2006
@@ -68,7 +68,7 @@
# quick check if we are looking for the current stylesheet
c_doc = xslt_resolver_context._c_style_doc
if c_doc is not NULL and c_doc.URL is not NULL:
- if tree.strcmp(c_uri, c_doc.URL) == 0:
+ if cstd.strcmp(c_uri, c_doc.URL) == 0:
return tree.xmlCopyDoc(c_doc, 1)
# call the Python document loaders
From scoder at codespeak.net Sat May 6 18:54:59 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Sat May 6 18:55:00 2006
Subject: [Lxml-checkins] r26878 - in lxml/trunk/src/lxml: . tests
Message-ID: <20060506165459.F233710083@code0.codespeak.net>
Author: scoder
Date: Sat May 6 18:54:58 2006
New Revision: 26878
Modified:
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/tests/test_etree.py
Log:
XMLParser: support chunk_size < 0 for read-at-once, some cleanup in file-like error handling
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Sat May 6 18:54:58 2006
@@ -146,9 +146,28 @@
__GLOBAL_PARSER_CONTEXT._initParserDict(c_ctxt)
c_ctxt._private = self._context
+cdef _raiseParseError(xmlParserCtxt* ctxt, char* c_filename):
+ if c_filename is not NULL and \
+ ctxt.lastError.domain == xmlerror.XML_FROM_IO:
+ if ctxt.lastError.message is not NULL:
+ message = "Error reading file %s: %s" % (
+ funicode(c_filename), funicode(ctxt.lastError.message))
+ else:
+ message = "Error reading file %s" % funicode(c_filename)
+ raise IOError, message
+ elif ctxt.lastError.message is not NULL:
+ raise XMLSyntaxError, funicode(ctxt.lastError.message)
+ else:
+ raise XMLSyntaxError
+
cdef xmlDoc* _handleParseResult(xmlParserCtxt* ctxt, xmlDoc* result,
char* c_filename, int recover) except NULL:
cdef _ResolverContext context
+ if ctxt.myDoc is not NULL:
+ if ctxt.myDoc != result:
+ tree.xmlFreeDoc(ctxt.myDoc)
+ ctxt.myDoc = NULL
+
if ctxt.wellFormed or recover:
__GLOBAL_PARSER_CONTEXT._initDocDict(result)
elif result is not NULL:
@@ -165,18 +184,7 @@
context._raise_if_stored()
if result is NULL:
- if c_filename is not NULL and \
- ctxt.lastError.domain == xmlerror.XML_FROM_IO:
- if ctxt.lastError.message is not NULL:
- message = "Error reading file %s: %s" % (
- funicode(c_filename), funicode(ctxt.lastError.message))
- else:
- message = "Error reading file %s" % funicode(c_filename)
- raise IOError, message
- elif ctxt.lastError.message is not NULL:
- raise XMLSyntaxError, funicode(ctxt.lastError.message)
- else:
- raise XMLSyntaxError
+ _raiseParseError(ctxt, c_filename)
return result
############################################################
@@ -191,7 +199,7 @@
xmlparser.XML_PARSE_NOERROR
)
-cdef object __FILE_READ_CHUNK_SIZE
+cdef int __FILE_READ_CHUNK_SIZE
__FILE_READ_CHUNK_SIZE = 32768
cdef class XMLParser(BaseParser):
@@ -213,6 +221,7 @@
* ns_clean - clean up redundant namespace declarations
* recover - try hard to parse through broken XML
* chunk_size - read this many bytes from file-like objects
+ (< 0 means: read everything in one step)
Note that you must not share parsers between threads. This applies also
to the default parser.
@@ -229,6 +238,11 @@
self._memory_parser_ctxt = NULL
self._file_parser_ctxt = NULL
self._push_parser_ctxt = NULL
+
+ self._chunk_size = int(chunk_size)
+ if self._chunk_size == 0:
+ raise ValueError, "Chunk size must not be 0"
+
BaseParser.__init__(self)
parse_options = _XML_DEFAULT_PARSE_OPTIONS
@@ -248,7 +262,6 @@
parse_options = parse_options | xmlparser.XML_PARSE_RECOVER
self._parse_options = parse_options
- self._chunk_size = int(chunk_size)
def __dealloc__(self):
if self._file_parser_ctxt != NULL:
@@ -309,10 +322,15 @@
cdef xmlDoc* _parseDocFromFilelike(self, filelike,
char* c_filename) except NULL:
+ # we read Python string, so we must convert to UTF-8
cdef xmlDoc* result
cdef xmlParserCtxt* pctxt
cdef int recover
cdef int success
+ if self._chunk_size < 0:
+ # read whole file at once
+ data = _utf8(filelike.read())
+ return self._parseDoc(data, c_filename)
self._error_log.connect()
pctxt = self._push_parser_ctxt
if pctxt is NULL:
@@ -338,7 +356,7 @@
data = data.replace('\r\n', '\n')
success = xmlparser.xmlParseChunk(pctxt, _cstr(data), len(data), 0)
if success != 0:
- return _handleParseResult(pctxt, NULL, c_filename, 0)
+ _raiseParseError(pctxt, c_filename)
data = _utf8( read(self._chunk_size) )
xmlparser.xmlParseChunk(pctxt, NULL, 0, 1)
except Exception:
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Sat May 6 18:54:58 2006
@@ -435,6 +435,27 @@
self.assertEquals(etree.tostring(root).replace('\r', ''),
xml.replace('\r', ''))
+ def test_parse_fileobject_chunk_size(self):
+ etree = self.etree
+ xml = '' + 'test ' * 10 + ' '
+
+ self.assertRaises(ValueError, etree.XMLParser, chunk_size=0)
+
+ parser = etree.XMLParser(chunk_size=-1)
+ f = SillyFileLike(xml)
+ root = etree.parse(f, parser).getroot()
+ self.assertEquals(etree.tostring(root), xml)
+
+ parser = etree.XMLParser(chunk_size=3)
+ f = SillyFileLike(xml)
+ root = etree.parse(f, parser).getroot()
+ self.assertEquals(etree.tostring(root), xml)
+
+ parser = etree.XMLParser(chunk_size=21)
+ f = SillyFileLike(xml)
+ root = etree.parse(f, parser).getroot()
+ self.assertEquals(etree.tostring(root), xml)
+
def _writeElement(self, element, encoding='us-ascii'):
"""Write out element for comparison.
"""
From scoder at codespeak.net Sat May 6 19:02:32 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Sat May 6 19:02:33 2006
Subject: [Lxml-checkins] r26880 - lxml/trunk/src/lxml
Message-ID: <20060506170232.59C8810083@code0.codespeak.net>
Author: scoder
Date: Sat May 6 19:02:31 2006
New Revision: 26880
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
fix for potential bug in XMLParser._parseDocFromFilelike: could stop reading prematurely on unicode files
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Sat May 6 19:02:31 2006
@@ -346,10 +346,7 @@
try:
read = filelike.read
- data = read(self._chunk_size)
- if python.PyUnicode_Check(data):
- data = _stripDeclaration(data)
- data = _utf8(data)
+ data = _utf8( read(self._chunk_size) )
while data:
if _LIBXML_VERSION_INT <= 20622:
# CRLF reading bug in libxml2 <= 2.6.22
From scoder at codespeak.net Sat May 6 20:42:00 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Sat May 6 20:42:02 2006
Subject: [Lxml-checkins] r26892 - lxml/trunk/src/lxml/tests
Message-ID: <20060506184200.C642910083@code0.codespeak.net>
Author: scoder
Date: Sat May 6 20:41:59 2006
New Revision: 26892
Modified:
lxml/trunk/src/lxml/tests/test_etree.py
Log:
updated test case
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Sat May 6 20:41:59 2006
@@ -437,7 +437,7 @@
def test_parse_fileobject_chunk_size(self):
etree = self.etree
- xml = '' + 'test ' * 10 + ' '
+ xml = '' + 'test ' * 20 + ' '
self.assertRaises(ValueError, etree.XMLParser, chunk_size=0)
@@ -451,7 +451,7 @@
root = etree.parse(f, parser).getroot()
self.assertEquals(etree.tostring(root), xml)
- parser = etree.XMLParser(chunk_size=21)
+ parser = etree.XMLParser(chunk_size=13)
f = SillyFileLike(xml)
root = etree.parse(f, parser).getroot()
self.assertEquals(etree.tostring(root), xml)
From scoder at codespeak.net Sun May 7 21:17:55 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Sun May 7 21:17:56 2006
Subject: [Lxml-checkins] r26950 - lxml/trunk/src/lxml/tests
Message-ID: <20060507191755.1871110076@code0.codespeak.net>
Author: scoder
Date: Sun May 7 21:17:54 2006
New Revision: 26950
Modified:
lxml/trunk/src/lxml/tests/test_io.py
Log:
extended test case
Modified: lxml/trunk/src/lxml/tests/test_io.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_io.py (original)
+++ lxml/trunk/src/lxml/tests/test_io.py Sun May 7 21:17:54 2006
@@ -57,9 +57,12 @@
# and now do it again; previous content should still be there
root2 = tree.parse(filename)
self.assertEquals('a', root.tag)
+ self.assertEquals('a', root2.tag)
# now remove all references to root2, and parse again
del root2
root3 = tree.parse(filename)
+ self.assertEquals('a', root.tag)
+ self.assertEquals('a', root3.tag)
# root2's memory should've been freed here
# XXX how to check?
From scoder at codespeak.net Sun May 7 21:20:25 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Sun May 7 21:20:28 2006
Subject: [Lxml-checkins] r26952 - in lxml/trunk/src/lxml: . tests
Message-ID: <20060507192025.4DC1510080@code0.codespeak.net>
Author: scoder
Date: Sun May 7 21:20:22 2006
New Revision: 26952
Modified:
lxml/trunk/src/lxml/cstd.pxd
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/python.pxd
lxml/trunk/src/lxml/tests/test_etree.py
lxml/trunk/src/lxml/tree.pxd
lxml/trunk/src/lxml/xmlparser.pxd
Log:
another rewrite of file-like parsing: let libxml2 pull the data by using IOInputStream and ReadIO => more generic
Modified: lxml/trunk/src/lxml/cstd.pxd
==============================================================================
--- lxml/trunk/src/lxml/cstd.pxd (original)
+++ lxml/trunk/src/lxml/cstd.pxd Sun May 7 21:20:22 2006
@@ -1,10 +1,14 @@
cdef extern from "stdio.h":
ctypedef struct FILE
+
+cdef extern from "string.h":
+ ctypedef int size_t
cdef int strlen(char* s)
cdef char* strstr(char* haystack, char* needle)
cdef int strcmp(char* s1, char* s2)
- cdef int strncmp(char* s1, char* s2, int len)
+ cdef int strncmp(char* s1, char* s2, size_t len)
+ cdef void* memcpy(void* dest, void* src, size_t len)
cdef extern from "stdarg.h":
ctypedef void *va_list
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Sun May 7 21:20:22 2006
@@ -59,6 +59,69 @@
############################################################
+## support for file-like objects
+############################################################
+
+cdef class _FileParserContext:
+ cdef object _filelike
+ cdef object _url
+ cdef object _bytes_utf
+ cdef _ExceptionContext _exc_context
+ cdef cstd.size_t _bytes_read
+ cdef char* _c_url
+ def __init__(self, filelike, exc_context, url=None):
+ self._exc_context = exc_context
+ self._filelike = filelike
+ self._url = url
+ if url is None:
+ self._c_url = NULL
+ else:
+ self._c_url = _cstr(url)
+ self._bytes_utf = ''
+ self._bytes_read = 0
+
+ cdef xmlparser.xmlParserInput* _createParserInput(self, xmlParserCtxt* ctxt):
+ cdef xmlparser.xmlParserInputBuffer* c_buffer
+ c_buffer = xmlparser.xmlAllocParserInputBuffer(0)
+ c_buffer.context = self
+ c_buffer.readcallback = _copyFilelike
+ return xmlparser.xmlNewIOInputStream(ctxt, c_buffer, 0)
+
+ cdef xmlDoc* _readDoc(self, xmlParserCtxt* ctxt, int options):
+ return xmlparser.xmlCtxtReadIO(
+ ctxt, _copyFilelike, NULL, self,
+ self._c_url, NULL, options)
+
+ cdef int write(self, char* c_buffer, int c_size):
+ cdef char* c_start
+ cdef Py_ssize_t byte_count, remaining
+ if self._bytes_read < 0:
+ return 0
+ try:
+ byte_count = python.PyString_GET_SIZE(self._bytes_utf)
+ remaining = byte_count - self._bytes_read
+ if remaining <= 0:
+ self._bytes_utf = _utf8( self._filelike.read(c_size) )
+ self._bytes_read = 0
+ remaining = python.PyString_GET_SIZE(self._bytes_utf)
+ if remaining == 0:
+ self._bytes_read = -1
+ return 0
+ if c_size > remaining:
+ c_size = remaining
+ c_start = _cstr(self._bytes_utf) + self._bytes_read
+ self._bytes_read = self._bytes_read + c_size
+ cstd.memcpy(c_buffer, c_start, c_size)
+ return c_size
+ except Exception:
+ self._exc_context._store_raised()
+ return -1
+
+cdef int _copyFilelike(void* ctxt, char* c_buffer, int c_size):
+ return (<_FileParserContext>ctxt).write(c_buffer, c_size)
+
+
+############################################################
## support for custom document loaders
############################################################
@@ -66,6 +129,7 @@
xmlParserCtxt* c_context):
cdef _ResolverContext context
cdef _InputDocument doc_ref
+ cdef _FileParserContext file_context
cdef xmlparser.xmlParserInput* c_input
if c_context._private is NULL or \
not isinstance(c_context._private, _ResolverContext):
@@ -104,9 +168,8 @@
c_input = xmlparser.xmlNewInputFromFile(
c_context, _cstr(doc_ref._data_utf))
elif doc_ref._type == PARSER_DATA_FILE:
- data = doc_ref._file.read()
- c_input = xmlparser.xmlNewStringInputStream(
- c_context, _cstr(data))
+ file_context = _FileParserContext(doc_ref._file, context)
+ c_input = file_context._createParserInput(c_context)
if data is not None:
context._storage.add(data)
@@ -194,14 +257,9 @@
cdef int _XML_DEFAULT_PARSE_OPTIONS
_XML_DEFAULT_PARSE_OPTIONS = (
xmlparser.XML_PARSE_NOENT |
- xmlparser.XML_PARSE_NOCDATA |
- xmlparser.XML_PARSE_NOWARNING |
- xmlparser.XML_PARSE_NOERROR
+ xmlparser.XML_PARSE_NOCDATA
)
-cdef int __FILE_READ_CHUNK_SIZE
-__FILE_READ_CHUNK_SIZE = 32768
-
cdef class XMLParser(BaseParser):
"""The XML parser. Parsers can be supplied as additional argument to
various parse functions of the lxml API. A default parser is always
@@ -220,28 +278,21 @@
* no_network - prevent network access
* ns_clean - clean up redundant namespace declarations
* recover - try hard to parse through broken XML
- * chunk_size - read this many bytes from file-like objects
- (< 0 means: read everything in one step)
Note that you must not share parsers between threads. This applies also
to the default parser.
"""
cdef int _parse_options
- cdef object _chunk_size
cdef xmlParserCtxt* _file_parser_ctxt
cdef xmlParserCtxt* _memory_parser_ctxt
- cdef xmlParserCtxt* _push_parser_ctxt
+ cdef xmlParserCtxt* _filelike_parser_ctxt
def __init__(self, attribute_defaults=False, dtd_validation=False,
load_dtd=False, no_network=False, ns_clean=False,
- recover=False, chunk_size=__FILE_READ_CHUNK_SIZE):
+ recover=False):
cdef int parse_options
- self._memory_parser_ctxt = NULL
- self._file_parser_ctxt = NULL
- self._push_parser_ctxt = NULL
-
- self._chunk_size = int(chunk_size)
- if self._chunk_size == 0:
- raise ValueError, "Chunk size must not be 0"
+ self._memory_parser_ctxt = NULL
+ self._file_parser_ctxt = NULL
+ self._filelike_parser_ctxt = NULL
BaseParser.__init__(self)
@@ -268,8 +319,8 @@
xmlparser.xmlFreeParserCtxt(self._file_parser_ctxt)
if self._memory_parser_ctxt != NULL:
xmlparser.xmlFreeParserCtxt(self._memory_parser_ctxt)
- if self._push_parser_ctxt != NULL:
- xmlparser.xmlFreeParserCtxt(self._push_parser_ctxt)
+ if self._filelike_parser_ctxt != NULL:
+ xmlparser.xmlFreeParserCtxt(self._filelike_parser_ctxt)
def copy(self):
"Create a new parser with the same configuration."
@@ -323,51 +374,22 @@
cdef xmlDoc* _parseDocFromFilelike(self, filelike,
char* c_filename) except NULL:
# we read Python string, so we must convert to UTF-8
+ cdef _FileParserContext file_context
cdef xmlDoc* result
cdef xmlParserCtxt* pctxt
cdef int recover
- cdef int success
- if self._chunk_size < 0:
- # read whole file at once
- data = _utf8(filelike.read())
- return self._parseDoc(data, c_filename)
self._error_log.connect()
- pctxt = self._push_parser_ctxt
+ pctxt = self._filelike_parser_ctxt
if pctxt is NULL:
pctxt = self._createContext()
- self._push_parser_ctxt = pctxt
+ self._filelike_parser_ctxt = pctxt
self._initContext(pctxt)
- result = NULL
- success = xmlparser.xmlCtxtResetPush(pctxt, NULL, 0, c_filename, NULL)
- if success != 0:
- self._error_log.disconnect()
- raise ParserError, "Failed to setup parser context"
- xmlparser.xmlCtxtUseOptions(pctxt, self._parse_options)
-
- try:
- read = filelike.read
- data = _utf8( read(self._chunk_size) )
- while data:
- if _LIBXML_VERSION_INT <= 20622:
- # CRLF reading bug in libxml2 <= 2.6.22
- data = data.replace('\r\n', '\n')
- success = xmlparser.xmlParseChunk(pctxt, _cstr(data), len(data), 0)
- if success != 0:
- _raiseParseError(pctxt, c_filename)
- data = _utf8( read(self._chunk_size) )
- xmlparser.xmlParseChunk(pctxt, NULL, 0, 1)
- except Exception:
- if pctxt.myDoc is not NULL:
- tree.xmlFreeDoc(pctxt.myDoc)
- pctxt.myDoc = NULL
- self._error_log.disconnect()
- raise
-
+ file_context = _FileParserContext(filelike, self._context)
+ result = file_context._readDoc(pctxt, self._parse_options)
self._error_log.disconnect()
- result = pctxt.myDoc
- pctxt.myDoc = NULL
recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
- return _handleParseResult(pctxt, result, c_filename, recover)
+ result = _handleParseResult(pctxt, result, c_filename, recover)
+ return result
cdef xmlDoc* _internalParseDoc(char* c_text, int options,
_ResolverContext context) except NULL:
@@ -442,10 +464,7 @@
############################################################
cdef int _HTML_DEFAULT_PARSE_OPTIONS
-_HTML_DEFAULT_PARSE_OPTIONS = (
- htmlparser.HTML_PARSE_NOWARNING |
- htmlparser.HTML_PARSE_NOERROR
- )
+_HTML_DEFAULT_PARSE_OPTIONS = 0
cdef class HTMLParser(BaseParser):
"""The HTML parser. This parser allows reading HTML into a normal XML
Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd (original)
+++ lxml/trunk/src/lxml/python.pxd Sun May 7 21:20:22 2006
@@ -19,6 +19,7 @@
cdef object PyString_FromStringAndSize(char* s, Py_ssize_t size)
cdef object PyString_FromString(char* s)
cdef object PyString_FromFormat(char* format, ...)
+ cdef Py_ssize_t PyString_GET_SIZE(object s)
cdef object PyBool_FromLong(long value)
cdef Py_ssize_t PyList_GET_SIZE(object l)
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Sun May 7 21:20:22 2006
@@ -425,37 +425,6 @@
self.assertEquals(docinfo.root_name, 'html')
self.assertEquals(docinfo.doctype, '')
- def test_parse_fileobject_crlf(self):
- # libxml2 < 2.6.23 has a bug reading CRLF files in chunks
- etree = self.etree
- parser = etree.XMLParser(chunk_size=3)
- xml = '' + '\r\ntest\r\n \r\n' * 10 + ' '
- f = SillyFileLike(xml)
- root = etree.parse(f, parser).getroot()
- self.assertEquals(etree.tostring(root).replace('\r', ''),
- xml.replace('\r', ''))
-
- def test_parse_fileobject_chunk_size(self):
- etree = self.etree
- xml = '' + 'test ' * 20 + ' '
-
- self.assertRaises(ValueError, etree.XMLParser, chunk_size=0)
-
- parser = etree.XMLParser(chunk_size=-1)
- f = SillyFileLike(xml)
- root = etree.parse(f, parser).getroot()
- self.assertEquals(etree.tostring(root), xml)
-
- parser = etree.XMLParser(chunk_size=3)
- f = SillyFileLike(xml)
- root = etree.parse(f, parser).getroot()
- self.assertEquals(etree.tostring(root), xml)
-
- parser = etree.XMLParser(chunk_size=13)
- f = SillyFileLike(xml)
- root = etree.parse(f, parser).getroot()
- self.assertEquals(etree.tostring(root), xml)
-
def _writeElement(self, element, encoding='us-ascii'):
"""Write out element for comparison.
"""
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Sun May 7 21:20:22 2006
@@ -172,7 +172,6 @@
cdef char* xmlBufferContent(xmlBuffer* buf)
cdef extern from "libxml/xmlIO.h":
-
cdef xmlOutputBuffer* xmlAllocOutputBuffer(xmlCharEncodingHandler* encoder)
cdef xmlOutputBuffer* xmlOutputBufferCreateFile(
FILE* file,
@@ -181,6 +180,9 @@
cdef int xmlOutputBufferFlush(xmlOutputBuffer* out)
cdef int xmlOutputBufferClose(xmlOutputBuffer* out)
+ ctypedef int (*xmlInputReadCallback)(void* context, char* buffer, int len)
+ ctypedef int (*xmlInputCloseCallback)(void * context)
+
cdef extern from "libxml/xmlsave.h":
ctypedef struct xmlSaveCtxt:
pass
Modified: lxml/trunk/src/lxml/xmlparser.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlparser.pxd (original)
+++ lxml/trunk/src/lxml/xmlparser.pxd Sun May 7 21:20:22 2006
@@ -1,8 +1,16 @@
from tree cimport xmlDoc, xmlDict
+from tree cimport xmlInputReadCallback, xmlInputCloseCallback
from xmlerror cimport xmlError
cdef extern from "libxml/tree.h":
ctypedef struct xmlParserInput
+ ctypedef struct xmlParserInputBuffer:
+ void* context
+ xmlInputReadCallback readcallback
+ xmlInputCloseCallback closecallback
+
+cdef extern from "libxml/xmlIO.h":
+ cdef xmlParserInputBuffer* xmlAllocParserInputBuffer(int enc)
cdef extern from "libxml/parser.h":
@@ -37,15 +45,15 @@
XML_PARSE_NOXINCNODE = 32768 # do not generate XINCLUDE START/END nodes
# libxml2 2.6.21+ only:
#XML_PARSE_COMPACT = 65536 # compact small text nodes
-
+
cdef void xmlInitParser()
cdef int xmlLineNumbersDefault(int onoff)
cdef xmlParserCtxt* xmlNewParserCtxt()
+ cdef xmlParserInput* xmlNewIOInputStream(xmlParserCtxt* ctxt,
+ xmlParserInputBuffer* input,
+ int enc)
cdef int xmlCtxtUseOptions(xmlParserCtxt* ctxt, int options)
cdef void xmlFreeParserCtxt(xmlParserCtxt* ctxt)
- cdef int xmlCtxtResetPush(xmlParserCtxt* ctxt,
- char* chunk, int size,
- char* filename, char* encoding)
cdef int xmlParseChunk(xmlParserCtxt* ctxt,
char* chunk, int size, int terminate)
cdef xmlDoc* xmlCtxtReadDoc(xmlParserCtxt* ctxt,
@@ -53,6 +61,11 @@
int options)
cdef xmlDoc* xmlCtxtReadFile(xmlParserCtxt* ctxt,
char* filename, char* encoding, int options)
+ cdef xmlDoc* xmlCtxtReadIO(xmlParserCtxt* ctxt,
+ xmlInputReadCallback ioread,
+ xmlInputCloseCallback ioclose,
+ void* ioctx,
+ char* URL, char* encoding, int options)
# entity loaders:
From scoder at codespeak.net Sun May 7 22:31:27 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Sun May 7 22:31:29 2006
Subject: [Lxml-checkins] r26957 - in lxml/trunk/src/lxml: . tests
Message-ID: <20060507203127.88C0A10083@code0.codespeak.net>
Author: scoder
Date: Sun May 7 22:31:25 2006
New Revision: 26957
Modified:
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/htmlparser.pxd
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/tests/test_htmlparser.py
lxml/trunk/src/lxml/xslt.pxi
Log:
major restructuring and cleanup in parser.pxi
* merge parse functions of XMLParser and HTMLParser back into base class
* use same method for file-like parsing in both
=> reduced code duplication, more readable, less error prone, simpler to test
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Sun May 7 22:31:25 2006
@@ -129,7 +129,7 @@
raise type, value, traceback
-cdef class BaseParser # forward declaration
+cdef class _BaseParser # forward declaration
cdef class _Document:
"""Internal base class to reference a libxml document.
@@ -139,7 +139,7 @@
"""
cdef int _ns_counter
cdef xmlDoc* _c_doc
- cdef BaseParser _parser
+ cdef _BaseParser _parser
def __dealloc__(self):
# if there are no more references to the document, it is safe
Modified: lxml/trunk/src/lxml/htmlparser.pxd
==============================================================================
--- lxml/trunk/src/lxml/htmlparser.pxd (original)
+++ lxml/trunk/src/lxml/htmlparser.pxd Sun May 7 22:31:25 2006
@@ -1,4 +1,5 @@
from tree cimport xmlDoc, xmlDict
+from tree cimport xmlInputReadCallback, xmlInputCloseCallback
from xmlparser cimport xmlParserCtxt
from xmlerror cimport xmlError
@@ -24,3 +25,8 @@
cdef xmlDoc* htmlCtxtReadDoc(xmlParserCtxt* ctxt,
char* buffer, char* URL, char* encoding,
int options)
+ cdef xmlDoc* htmlCtxtReadIO(xmlParserCtxt* ctxt,
+ xmlInputReadCallback ioread,
+ xmlInputCloseCallback ioclose,
+ void* ioctx,
+ char* URL, char* encoding, int options)
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Sun May 7 22:31:25 2006
@@ -10,6 +10,10 @@
class ParserError(LxmlError):
pass
+ctypedef enum LxmlParserType:
+ LXML_XML_PARSER
+ LXML_HTML_PARSER
+
cdef class _ParserContext:
"""Global parser context to share the string dictionary.
"""
@@ -87,10 +91,16 @@
c_buffer.readcallback = _copyFilelike
return xmlparser.xmlNewIOInputStream(ctxt, c_buffer, 0)
- cdef xmlDoc* _readDoc(self, xmlParserCtxt* ctxt, int options):
- return xmlparser.xmlCtxtReadIO(
- ctxt, _copyFilelike, NULL, self,
- self._c_url, NULL, options)
+ cdef xmlDoc* _readDoc(self, xmlParserCtxt* ctxt, int options,
+ LxmlParserType parser_type):
+ if parser_type == LXML_XML_PARSER:
+ return xmlparser.xmlCtxtReadIO(
+ ctxt, _copyFilelike, NULL, self,
+ self._c_url, NULL, options)
+ else:
+ return htmlparser.htmlCtxtReadIO(
+ ctxt, _copyFilelike, NULL, self,
+ self._c_url, NULL, options)
cdef int write(self, char* c_buffer, int c_size):
cdef char* c_start
@@ -184,30 +194,107 @@
## Parsers
############################################################
-cdef class BaseParser:
+cdef class _BaseParser:
+ cdef int _parse_options
cdef _ErrorLog _error_log
cdef readonly object resolvers
cdef _ResolverContext _context
+ cdef LxmlParserType _parser_type
+ cdef xmlParserCtxt* _parser_ctxt
+
def __init__(self):
- cdef _ResolverContext context
+ cdef xmlParserCtxt* pctxt
+ if isinstance(self, HTMLParser):
+ self._parser_type = LXML_HTML_PARSER
+ pctxt = htmlparser.htmlCreateMemoryParserCtxt('dummy', 5)
+ elif isinstance(self, XMLParser):
+ self._parser_type = LXML_XML_PARSER
+ pctxt = xmlparser.xmlNewParserCtxt()
+ else:
+ raise TypeError, "This class cannot be instantiated"
+ self._parser_ctxt = pctxt
+ if pctxt is NULL:
+ raise ParserError, "Failed to create parser context"
self._error_log = _ErrorLog()
- self.resolvers = _ResolverRegistry()
- self._context = _ResolverContext(self.resolvers)
+ self.resolvers = _ResolverRegistry()
+ self._context = _ResolverContext(self.resolvers)
+ pctxt._private = self._context
+
+ def __dealloc__(self):
+ if self._parser_ctxt != NULL:
+ xmlparser.xmlFreeParserCtxt(self._parser_ctxt)
property error_log:
def __get__(self):
return self._error_log.copy()
- cdef _copy(self):
- cdef BaseParser parser
+ def copy(self):
+ "Create a new parser with the same configuration."
+ cdef _BaseParser parser
parser = self.__class__()
+ parser._parse_options = self._parse_options
parser.resolvers = self.resolvers.copy()
parser._context = _ResolverContext(parser.resolvers)
return parser
- cdef _initContext(self, xmlParserCtxt* c_ctxt):
- __GLOBAL_PARSER_CONTEXT._initParserDict(c_ctxt)
- c_ctxt._private = self._context
+ cdef xmlDoc* _parseDoc(self, char* c_text, char* c_filename) except NULL:
+ """Parse document, share dictionary if possible.
+ """
+ cdef xmlDoc* result
+ cdef xmlParserCtxt* pctxt
+ cdef int recover
+ self._error_log.connect()
+ pctxt = self._parser_ctxt
+ __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt)
+
+ if self._parser_type == LXML_HTML_PARSER:
+ result = htmlparser.htmlCtxtReadDoc(
+ pctxt, c_text, c_filename, NULL, self._parse_options)
+ else:
+ result = xmlparser.xmlCtxtReadDoc(
+ pctxt, c_text, c_filename, NULL, self._parse_options)
+
+ self._error_log.disconnect()
+ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
+ return _handleParseResult(pctxt, result, NULL, recover)
+
+ cdef xmlDoc* _parseDocFromFile(self, char* c_filename) except NULL:
+ cdef xmlDoc* result
+ cdef xmlParserCtxt* pctxt
+ cdef int recover
+ self._error_log.connect()
+ pctxt = self._parser_ctxt
+ __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt)
+
+ if self._parser_type == LXML_HTML_PARSER:
+ result = htmlparser.htmlCtxtReadFile(
+ pctxt, c_filename, NULL, self._parse_options)
+ else:
+ result = xmlparser.xmlCtxtReadFile(
+ pctxt, c_filename, NULL, self._parse_options)
+
+ self._error_log.disconnect()
+ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
+ return _handleParseResult(pctxt, result, c_filename, recover)
+
+ cdef xmlDoc* _parseDocFromFilelike(self, filelike,
+ char* c_filename) except NULL:
+ # we read Python string, so we must convert to UTF-8
+ cdef _FileParserContext file_context
+ cdef xmlDoc* result
+ cdef xmlParserCtxt* pctxt
+ cdef int recover
+ self._error_log.connect()
+ pctxt = self._parser_ctxt
+ __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt)
+
+ file_context = _FileParserContext(filelike, self._context)
+ result = file_context._readDoc(
+ pctxt, self._parse_options, self._parser_type)
+
+ self._error_log.disconnect()
+ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
+ return _handleParseResult(pctxt, result, c_filename, recover)
cdef _raiseParseError(xmlParserCtxt* ctxt, char* c_filename):
if c_filename is not NULL and \
@@ -260,7 +347,7 @@
xmlparser.XML_PARSE_NOCDATA
)
-cdef class XMLParser(BaseParser):
+cdef class XMLParser(_BaseParser):
"""The XML parser. Parsers can be supplied as additional argument to
various parse functions of the lxml API. A default parser is always
available and can be replaced by a call to the global function
@@ -282,19 +369,11 @@
Note that you must not share parsers between threads. This applies also
to the default parser.
"""
- cdef int _parse_options
- cdef xmlParserCtxt* _file_parser_ctxt
- cdef xmlParserCtxt* _memory_parser_ctxt
- cdef xmlParserCtxt* _filelike_parser_ctxt
def __init__(self, attribute_defaults=False, dtd_validation=False,
load_dtd=False, no_network=False, ns_clean=False,
recover=False):
cdef int parse_options
- self._memory_parser_ctxt = NULL
- self._file_parser_ctxt = NULL
- self._filelike_parser_ctxt = NULL
-
- BaseParser.__init__(self)
+ _BaseParser.__init__(self)
parse_options = _XML_DEFAULT_PARSE_OPTIONS
if load_dtd:
@@ -314,83 +393,6 @@
self._parse_options = parse_options
- def __dealloc__(self):
- if self._file_parser_ctxt != NULL:
- xmlparser.xmlFreeParserCtxt(self._file_parser_ctxt)
- if self._memory_parser_ctxt != NULL:
- xmlparser.xmlFreeParserCtxt(self._memory_parser_ctxt)
- if self._filelike_parser_ctxt != NULL:
- xmlparser.xmlFreeParserCtxt(self._filelike_parser_ctxt)
-
- def copy(self):
- "Create a new parser with the same configuration."
- cdef XMLParser parser
- parser = self._copy()
- parser._parse_options = self._parse_options
- return parser
-
- cdef xmlParserCtxt* _createContext(self) except NULL:
- cdef xmlParserCtxt* pctxt
- pctxt = xmlparser.xmlNewParserCtxt()
- if pctxt is NULL:
- self._error_log.disconnect()
- raise ParserError, "Failed to create parser context"
- return pctxt
-
- cdef xmlDoc* _parseDoc(self, char* c_text, char* c_filename) except NULL:
- """Parse document, share dictionary if possible.
- """
- cdef xmlDoc* result
- cdef xmlParserCtxt* pctxt
- cdef int recover
- self._error_log.connect()
- pctxt = self._memory_parser_ctxt
- if pctxt is NULL:
- pctxt = self._createContext()
- self._memory_parser_ctxt = pctxt
- self._initContext(pctxt)
- result = xmlparser.xmlCtxtReadDoc(
- pctxt, c_text, c_filename, NULL, self._parse_options)
- self._error_log.disconnect()
- recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
- return _handleParseResult(pctxt, result, NULL, recover)
-
- cdef xmlDoc* _parseDocFromFile(self, char* c_filename) except NULL:
- cdef xmlDoc* result
- cdef xmlParserCtxt* pctxt
- cdef int recover
- self._error_log.connect()
- pctxt = self._file_parser_ctxt
- if pctxt is NULL:
- pctxt = self._createContext()
- self._file_parser_ctxt = pctxt
- self._initContext(pctxt)
- result = xmlparser.xmlCtxtReadFile(
- pctxt, c_filename, NULL, self._parse_options)
- self._error_log.disconnect()
- recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
- return _handleParseResult(pctxt, result, c_filename, recover)
-
- cdef xmlDoc* _parseDocFromFilelike(self, filelike,
- char* c_filename) except NULL:
- # we read Python string, so we must convert to UTF-8
- cdef _FileParserContext file_context
- cdef xmlDoc* result
- cdef xmlParserCtxt* pctxt
- cdef int recover
- self._error_log.connect()
- pctxt = self._filelike_parser_ctxt
- if pctxt is NULL:
- pctxt = self._createContext()
- self._filelike_parser_ctxt = pctxt
- self._initContext(pctxt)
- file_context = _FileParserContext(filelike, self._context)
- result = file_context._readDoc(pctxt, self._parse_options)
- self._error_log.disconnect()
- recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
- result = _handleParseResult(pctxt, result, c_filename, recover)
- return result
-
cdef xmlDoc* _internalParseDoc(char* c_text, int options,
_ResolverContext context) except NULL:
# internal parser function for XSLT
@@ -435,7 +437,7 @@
cdef XMLParser __DEFAULT_XML_PARSER
__DEFAULT_XML_PARSER = XMLParser()
-cdef BaseParser __DEFAULT_PARSER
+cdef _BaseParser __DEFAULT_PARSER
__DEFAULT_PARSER = __DEFAULT_XML_PARSER
def set_default_parser(parser=None):
@@ -451,7 +453,7 @@
global __DEFAULT_PARSER
if parser is None:
__DEFAULT_PARSER = __DEFAULT_XML_PARSER
- elif isinstance(parser, (HTMLParser, XMLParser)):
+ elif isinstance(parser, _BaseParser):
__DEFAULT_PARSER = parser
else:
raise TypeError, "Invalid parser"
@@ -466,7 +468,7 @@
cdef int _HTML_DEFAULT_PARSE_OPTIONS
_HTML_DEFAULT_PARSE_OPTIONS = 0
-cdef class HTMLParser(BaseParser):
+cdef class HTMLParser(_BaseParser):
"""The HTML parser. This parser allows reading HTML into a normal XML
tree. By default, it can read broken (non well-formed) HTML, depending on
the capabilities of libxml2. Use the 'recover' option to switch this off.
@@ -478,14 +480,9 @@
Note that you must not share parsers between threads.
"""
- cdef int _parse_options
- cdef xmlParserCtxt* _memory_parser_ctxt
- cdef xmlParserCtxt* _file_parser_ctxt
def __init__(self, recover=True, no_network=False, remove_blank_text=False):
cdef int parse_options
- self._memory_parser_ctxt = NULL
- self._file_parser_ctxt = NULL
- BaseParser.__init__(self)
+ _BaseParser.__init__(self)
parse_options = _HTML_DEFAULT_PARSE_OPTIONS
if recover:
@@ -499,62 +496,6 @@
self._parse_options = parse_options
- def __dealloc__(self):
- if self._file_parser_ctxt != NULL:
- htmlparser.htmlFreeParserCtxt(self._file_parser_ctxt)
- if self._memory_parser_ctxt != NULL:
- htmlparser.htmlFreeParserCtxt(self._memory_parser_ctxt)
-
- def copy(self):
- "Create a new parser with the same configuration."
- cdef HTMLParser parser
- parser = self._copy()
- parser._parse_options = self._parse_options
- return parser
-
- cdef xmlDoc* _parseDoc(self, char* c_text, char* c_filename) except NULL:
- """Parse HTML document, share dictionary if possible.
- """
- cdef xmlDoc* result
- cdef xmlParserCtxt* pctxt
- cdef int recover
- self._error_log.connect()
- pctxt = self._memory_parser_ctxt
- if pctxt is NULL:
- pctxt = htmlparser.htmlCreateMemoryParserCtxt('dummy', 5)
- if pctxt is NULL:
- self._error_log.disconnect()
- raise ParserError, "Failed to create parser context"
- self._memory_parser_ctxt = pctxt
- self._initContext(pctxt)
- result = htmlparser.htmlCtxtReadDoc(
- pctxt, c_text, c_filename, NULL, self._parse_options)
- self._error_log.disconnect()
- recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
- return _handleParseResult(pctxt, result, NULL, recover)
-
- cdef xmlDoc* _parseDocFromFile(self, char* c_filename) except NULL:
- cdef xmlDoc* result
- cdef xmlParserCtxt* pctxt
- cdef int recover
- self._error_log.connect()
- pctxt = self._file_parser_ctxt
- if pctxt is NULL:
- pctxt = htmlparser.htmlCreateFileParserCtxt(c_filename, NULL)
- if pctxt is NULL:
- self._error_log.disconnect()
- warnings = self._error_log.filter_from_warnings()
- if warnings and warnings[-1].domain == xmlerror.XML_FROM_IO:
- raise IOError, "Could not open file %s" % c_filename
- raise ParserError, "Failed to create parser context"
- self._file_parser_ctxt = pctxt
- self._initContext(pctxt)
- result = htmlparser.htmlCtxtReadFile(
- pctxt, c_filename, NULL, self._parse_options)
- self._error_log.disconnect()
- recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
- return _handleParseResult(pctxt, result, c_filename, recover)
-
cdef HTMLParser __DEFAULT_HTML_PARSER
__DEFAULT_HTML_PARSER = HTMLParser()
@@ -566,45 +507,35 @@
cdef char* c_filename
if parser is None:
parser = __DEFAULT_PARSER
+ elif not isinstance(parser, _BaseParser):
+ raise TypeError, "invalid parser"
__GLOBAL_PARSER_CONTEXT._initParser()
if not filename:
c_filename = NULL
else:
c_filename = _cstr(filename)
- if isinstance(parser, XMLParser):
- return (parser)._parseDoc(_cstr(text_utf), c_filename)
- elif isinstance(parser, HTMLParser):
- return (parser)._parseDoc(_cstr(text_utf), c_filename)
- else:
- raise TypeError, "invalid parser"
+ return (<_BaseParser>parser)._parseDoc(_cstr(text_utf), c_filename)
cdef xmlDoc* _parseDocFromFile(filename, parser) except NULL:
if parser is None:
parser = __DEFAULT_PARSER
- __GLOBAL_PARSER_CONTEXT._initParser()
- if isinstance(parser, XMLParser):
- return (parser)._parseDocFromFile(_cstr(filename))
- elif isinstance(parser, HTMLParser):
- return (parser)._parseDocFromFile(_cstr(filename))
- else:
+ elif not isinstance(parser, _BaseParser):
raise TypeError, "invalid parser"
+ __GLOBAL_PARSER_CONTEXT._initParser()
+ return (<_BaseParser>parser)._parseDocFromFile(_cstr(filename))
cdef xmlDoc* _parseDocFromFilelike(source, filename, parser) except NULL:
cdef char* c_filename
if parser is None:
parser = __DEFAULT_PARSER
+ elif not isinstance(parser, _BaseParser):
+ raise TypeError, "invalid parser"
__GLOBAL_PARSER_CONTEXT._initParser()
if not filename:
c_filename = NULL
else:
c_filename = _cstr(filename)
- if isinstance(parser, XMLParser):
- return (parser)._parseDocFromFilelike(source, c_filename)
- elif isinstance(parser, HTMLParser):
- data_utf = _utf8(source.read())
- return (parser)._parseDoc(_cstr(data_utf), c_filename)
- else:
- raise TypeError, "invalid parser"
+ return (<_BaseParser>parser)._parseDocFromFilelike(source, c_filename)
cdef xmlDoc* _newDoc():
cdef xmlDoc* result
Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_htmlparser.py (original)
+++ lxml/trunk/src/lxml/tests/test_htmlparser.py Sun May 7 22:31:25 2006
@@ -60,6 +60,13 @@
def test_module_parse_html_filelike(self):
parser = self.etree.HTMLParser()
+ f = SillyFileLike(self.html_str)
+ tree = self.etree.parse(f, parser)
+ html = self.etree.tostring(tree.getroot())
+ self.assertEqual(unentitify(html), self.html_str)
+
+ def test_module_parse_html_filelike_unicode(self):
+ parser = self.etree.HTMLParser()
f = SillyFileLike(self.uhtml_str)
tree = self.etree.parse(f, parser)
html = self.etree.tostring(tree.getroot())
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Sun May 7 22:31:25 2006
@@ -30,8 +30,8 @@
cdef class _XSLTResolverContext(_ResolverContext):
cdef xmlDoc* _c_style_doc
- cdef BaseParser _parser
- def __init__(self, BaseParser parser not None):
+ cdef _BaseParser _parser
+ def __init__(self, _BaseParser parser not None):
_ResolverContext.__init__(self, parser.resolvers)
self._parser = parser
self._c_style_doc = NULL
From scoder at codespeak.net Mon May 8 06:44:28 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Mon May 8 06:44:31 2006
Subject: [Lxml-checkins] r26959 - lxml/trunk/src/lxml
Message-ID: <20060508044428.5B4CB10076@code0.codespeak.net>
Author: scoder
Date: Mon May 8 06:44:26 2006
New Revision: 26959
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
cleanup
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Mon May 8 06:44:26 2006
@@ -318,12 +318,13 @@
tree.xmlFreeDoc(ctxt.myDoc)
ctxt.myDoc = NULL
- if ctxt.wellFormed or recover:
- __GLOBAL_PARSER_CONTEXT._initDocDict(result)
- elif result is not NULL:
- # free broken document
- tree.xmlFreeDoc(result)
- result = NULL
+ if result is not NULL:
+ if ctxt.wellFormed or recover:
+ __GLOBAL_PARSER_CONTEXT._initDocDict(result)
+ else:
+ # free broken document
+ tree.xmlFreeDoc(result)
+ result = NULL
if ctxt._private is not NULL:
context = <_ResolverContext>ctxt._private
From scoder at codespeak.net Mon May 8 15:53:29 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Mon May 8 15:53:30 2006
Subject: [Lxml-checkins] r26965 - lxml/trunk
Message-ID: <20060508135329.508E510070@code0.codespeak.net>
Author: scoder
Date: Mon May 8 15:53:28 2006
New Revision: 26965
Modified:
lxml/trunk/bench.py
Log:
IO benchmarks
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Mon May 8 15:53:28 2006
@@ -1,5 +1,6 @@
import sys, string, time, copy, gc
from itertools import *
+from StringIO import StringIO
_TEXT = "some ASCII text"
_UTEXT = u"some klingon: \F8D2"
@@ -248,6 +249,26 @@
for child in reversed(root):
pass
+ @with_text(text=True, utext=True)
+ def bench_tostring_utf8(self, root):
+ self.etree.tostring(root, 'UTF-8')
+
+ @with_text(text=True, utext=True)
+ def bench_tostring_utf16(self, root):
+ self.etree.tostring(root, 'UTF-16')
+
+ @with_text(text=True, utext=True)
+ def bench_tostring_utf8_unicode_XML(self, root):
+ xml = unicode(self.etree.tostring(root, 'UTF-8'), 'UTF-8')
+ self.etree.XML(xml)
+
+ @with_text(text=True, utext=True)
+ def bench_write_utf8_parse_stringIO(self, root):
+ f = StringIO()
+ self.etree.ElementTree(root).write(f, 'UTF-8')
+ f.seek(0)
+ self.etree.parse(f)
+
def bench_append_from_document(self, root1, root2):
# == "1,2 2,3 1,3 3,1 3,2 2,1" # trees 1 and 2, or 2 and 3, or ...
for el in root2:
@@ -588,7 +609,7 @@
for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)):
bench_name, method_call = benchmark_setup[:2]
tree_set_name = build_treeset_name(*benchmark_setup[-3:])
- print "%-3s: %-23s" % (bench.lib_name, bench_name[6:29]),
+ print "%-3s: %-28s" % (bench.lib_name, bench_name[6:34]),
if method_call is None:
print "skipped"
continue
From scoder at codespeak.net Mon May 8 16:45:50 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Mon May 8 16:45:51 2006
Subject: [Lxml-checkins] r26966 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20060508144550.42B6410070@code0.codespeak.net>
Author: scoder
Date: Mon May 8 16:45:47 2006
New Revision: 26966
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/htmlparser.pxd
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/python.pxd
lxml/trunk/src/lxml/tests/test_htmlparser.py
lxml/trunk/src/lxml/tree.pxd
lxml/trunk/src/lxml/xmlparser.pxd
Log:
support parsing straight from Python unicode strings
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Mon May 8 16:45:47 2006
@@ -7,9 +7,14 @@
Features added
--------------
+* Parsing a unicode string no longer copies the string (reduced memory
+ footprint)
+
* Parsing file-like objects now reads chunks rather than the whole file
+ (reduced memory footprint)
-* Parsing StringIO objects from the start avoids copying the string
+* Parsing StringIO objects from the start avoids copying the string (reduced
+ memory footprint)
* Read-only 'docinfo' attribute in ElementTree class holds DOCTYPE
information, original encoding and XML version as seen by the parser
Modified: lxml/trunk/src/lxml/htmlparser.pxd
==============================================================================
--- lxml/trunk/src/lxml/htmlparser.pxd (original)
+++ lxml/trunk/src/lxml/htmlparser.pxd Mon May 8 16:45:47 2006
@@ -30,3 +30,6 @@
xmlInputCloseCallback ioclose,
void* ioctx,
char* URL, char* encoding, int options)
+ cdef xmlDoc* htmlCtxtReadMemory(xmlParserCtxt* ctxt,
+ char* buffer, int size,
+ char* filename, char* encoding, int options)
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Mon May 8 16:45:47 2006
@@ -61,6 +61,46 @@
cdef _ParserContext __GLOBAL_PARSER_CONTEXT
__GLOBAL_PARSER_CONTEXT = _ParserContext()
+############################################################
+## support for reading Python unicode
+############################################################
+
+# can libxml2 read plain Python unicode data?
+cdef char* _UNICODE_ENCODING
+_UNICODE_ENCODING = NULL
+
+cdef void _setupUnicodeParser():
+ """Sets _READ_UNICODE to 1 if libxml2 supports reading native Python
+ unicode. This depends on iconv, so we simply check if we find a matching
+ encoding handler.
+ """
+ cdef Py_ssize_t l
+ cdef char* buffer
+ cdef char* enc
+ utext = unicode(" ")
+ l = python.PyUnicode_GET_DATA_SIZE(utext)
+ buffer = python.PyUnicode_AS_DATA(utext)
+ enc = _findEncodingName(buffer, l)
+ if tree.xmlFindCharEncodingHandler(enc) is not NULL:
+ global _UNICODE_ENCODING
+ _UNICODE_ENCODING = enc
+
+cdef char* _findEncodingName(char* buffer, int size):
+ "Work around bug in libxml2: find iconv name of encoding on our own."
+ cdef int enc
+ enc = tree.xmlDetectCharEncoding(buffer, size)
+ if enc == tree.XML_CHAR_ENCODING_UTF16LE:
+ return "UTF16LE"
+ elif enc == tree.XML_CHAR_ENCODING_UTF16BE:
+ return "UTF16BE"
+ elif enc == tree.XML_CHAR_ENCODING_UCS4LE:
+ return "UCS-4LE"
+ elif enc == tree.XML_CHAR_ENCODING_UCS4BE:
+ return "UCS-4BE"
+ else:
+ return tree.xmlGetCharEncodingName(enc)
+
+_setupUnicodeParser()
############################################################
## support for file-like objects
@@ -237,6 +277,41 @@
parser._context = _ResolverContext(parser.resolvers)
return parser
+ cdef xmlDoc* _parseUnicodeDoc(self, utext, char* c_filename) except NULL:
+ """Parse unicode document, share dictionary if possible.
+ """
+ cdef xmlDoc* result
+ cdef xmlParserCtxt* pctxt
+ cdef int recover
+ cdef Py_ssize_t py_buffer_len
+ cdef int buffer_len
+ cdef char* c_text
+ cdef char* c_encoding
+ cdef int enc
+ py_buffer_len = python.PyUnicode_GET_DATA_SIZE(utext)
+ if py_buffer_len > python.INT_MAX:
+ text_utf = _utf8(utext)
+ return self._parseDoc(text_utf, c_filename)
+ buffer_len = py_buffer_len
+
+ self._error_log.connect()
+ pctxt = self._parser_ctxt
+ __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt)
+
+ c_text = python.PyUnicode_AS_DATA(utext)
+ if self._parser_type == LXML_HTML_PARSER:
+ result = htmlparser.htmlCtxtReadMemory(
+ pctxt, c_text, buffer_len, c_filename, _UNICODE_ENCODING,
+ self._parse_options)
+ else:
+ result = xmlparser.xmlCtxtReadMemory(
+ pctxt, c_text, buffer_len, c_filename, _UNICODE_ENCODING,
+ self._parse_options)
+
+ self._error_log.disconnect()
+ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
+ return _handleParseResult(pctxt, result, NULL, recover)
+
cdef xmlDoc* _parseDoc(self, char* c_text, char* c_filename) except NULL:
"""Parse document, share dictionary if possible.
"""
@@ -504,7 +579,7 @@
## helper functions for document creation
############################################################
-cdef xmlDoc* _parseDoc(text_utf, filename, parser) except NULL:
+cdef xmlDoc* _parseDoc(text, filename, parser) except NULL:
cdef char* c_filename
if parser is None:
parser = __DEFAULT_PARSER
@@ -515,7 +590,10 @@
c_filename = NULL
else:
c_filename = _cstr(filename)
- return (<_BaseParser>parser)._parseDoc(_cstr(text_utf), c_filename)
+ if python.PyUnicode_Check(text):
+ return (<_BaseParser>parser)._parseUnicodeDoc(text, c_filename)
+ else:
+ return (<_BaseParser>parser)._parseDoc(_cstr(text), c_filename)
cdef xmlDoc* _parseDocFromFile(filename, parser) except NULL:
if parser is None:
@@ -570,12 +648,15 @@
cdef _Document _parseMemoryDocument(text, url, parser):
cdef xmlDoc* c_doc
- text_utf = _utf8(text)
if python.PyUnicode_Check(text):
- text_utf = _stripDeclaration(text_utf)
+ # pass native unicode only if libxml2 can handle it
+ if _UNICODE_ENCODING is NULL:
+ text = _stripDeclaration(_utf8(text))
+ else:
+ text = _utf8(text)
if url is not None:
url = _utf8(url)
- c_doc = _parseDoc(text_utf, url, parser)
+ c_doc = _parseDoc(text, url, parser)
return _documentFactory(c_doc, parser)
cdef _Document _parseFilelikeDocument(source, url, parser):
Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd (original)
+++ lxml/trunk/src/lxml/python.pxd Mon May 8 16:45:47 2006
@@ -4,6 +4,7 @@
ctypedef struct PyObject
ctypedef int size_t
ctypedef int Py_ssize_t
+ cdef int INT_MAX
cdef FILE* PyFile_AsFile(PyObject* p)
cdef int PyFile_Check(object p)
@@ -16,6 +17,8 @@
char* errors)
cdef object PyUnicode_DecodeUTF8(char* s, Py_ssize_t size, char* errors)
cdef object PyUnicode_AsUTF8String(object ustring)
+ cdef char* PyUnicode_AS_DATA(object ustring)
+ cdef Py_ssize_t PyUnicode_GET_DATA_SIZE(object ustring)
cdef object PyString_FromStringAndSize(char* s, Py_ssize_t size)
cdef object PyString_FromString(char* s)
cdef object PyString_FromFormat(char* format, ...)
Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_htmlparser.py (original)
+++ lxml/trunk/src/lxml/tests/test_htmlparser.py Mon May 8 16:45:47 2006
@@ -27,6 +27,11 @@
self.assertEqual(self.etree.tostring(element),
self.html_str)
+ def test_module_HTML_unicode(self):
+ element = self.etree.HTML(self.uhtml_str)
+ self.assertEqual(unentitify(self.etree.tostring(element)),
+ self.uhtml_str)
+
def test_module_parse_html_error(self):
parser = self.etree.HTMLParser(recover=False)
parse = self.etree.parse
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Mon May 8 16:45:47 2006
@@ -8,8 +8,37 @@
cdef int LIBXML_VERSION
cdef extern from "libxml/encoding.h":
+ ctypedef enum xmlCharEncoding:
+ XML_CHAR_ENCODING_ERROR = -1 # No char encoding detected
+ XML_CHAR_ENCODING_NONE = 0 # No char encoding detected
+ XML_CHAR_ENCODING_UTF8 = 1 # UTF-8
+ XML_CHAR_ENCODING_UTF16LE = 2 # UTF-16 little endian
+ XML_CHAR_ENCODING_UTF16BE = 3 # UTF-16 big endian
+ XML_CHAR_ENCODING_UCS4LE = 4 # UCS-4 little endian
+ XML_CHAR_ENCODING_UCS4BE = 5 # UCS-4 big endian
+ XML_CHAR_ENCODING_EBCDIC = 6 # EBCDIC uh!
+ XML_CHAR_ENCODING_UCS4_2143 = 7 # UCS-4 unusual ordering
+ XML_CHAR_ENCODING_UCS4_3412 = 8 # UCS-4 unusual ordering
+ XML_CHAR_ENCODING_UCS2 = 9 # UCS-2
+ XML_CHAR_ENCODING_8859_1 = 10 # ISO-8859-1 ISO Latin 1
+ XML_CHAR_ENCODING_8859_2 = 11 # ISO-8859-2 ISO Latin 2
+ XML_CHAR_ENCODING_8859_3 = 12 # ISO-8859-3
+ XML_CHAR_ENCODING_8859_4 = 13 # ISO-8859-4
+ XML_CHAR_ENCODING_8859_5 = 14 # ISO-8859-5
+ XML_CHAR_ENCODING_8859_6 = 15 # ISO-8859-6
+ XML_CHAR_ENCODING_8859_7 = 16 # ISO-8859-7
+ XML_CHAR_ENCODING_8859_8 = 17 # ISO-8859-8
+ XML_CHAR_ENCODING_8859_9 = 18 # ISO-8859-9
+ XML_CHAR_ENCODING_2022_JP = 19 # ISO-2022-JP
+ XML_CHAR_ENCODING_SHIFT_JIS = 20 # Shift_JIS
+ XML_CHAR_ENCODING_EUC_JP = 21 # EUC-JP
+ XML_CHAR_ENCODING_ASCII = 22 # pure ASCII
+
ctypedef struct xmlCharEncodingHandler
cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name)
+ cdef xmlCharEncodingHandler* xmlGetCharEncodingHandler(int enc)
+ cdef int xmlDetectCharEncoding(char* text, int len)
+ cdef char* xmlGetCharEncodingName(int enc)
cdef extern from "libxml/hash.h":
ctypedef struct xmlHashTable
Modified: lxml/trunk/src/lxml/xmlparser.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlparser.pxd (original)
+++ lxml/trunk/src/lxml/xmlparser.pxd Mon May 8 16:45:47 2006
@@ -23,6 +23,7 @@
xmlDict* dict
void* _private
int wellFormed
+ int recovery
int options
xmlError lastError
@@ -66,6 +67,9 @@
xmlInputCloseCallback ioclose,
void* ioctx,
char* URL, char* encoding, int options)
+ cdef xmlDoc* xmlCtxtReadMemory(xmlParserCtxt* ctxt,
+ char* buffer, int size,
+ char* filename, char* encoding, int options)
# entity loaders:
From scoder at codespeak.net Mon May 8 17:46:30 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Mon May 8 17:46:31 2006
Subject: [Lxml-checkins] r26970 - lxml/trunk
Message-ID: <20060508154630.41CFF1006D@code0.codespeak.net>
Author: scoder
Date: Mon May 8 17:46:29 2006
New Revision: 26970
Modified:
lxml/trunk/bench.py
Log:
make benchmark output more readable
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Mon May 8 17:46:29 2006
@@ -619,9 +619,10 @@
result = run_bench(bench, *benchmark_setup)
+ print "%9.4f msec/pass, best of (" % min(result),
for t in result:
print "%9.4f" % t,
- print "msec/pass, best: %9.4f" % min(result)
+ print ")"
if len(benchmark_suites) > 1:
print # empty line between different benchmarks
From scoder at codespeak.net Mon May 8 18:15:10 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Mon May 8 18:15:12 2006
Subject: [Lxml-checkins] r26973 - lxml/trunk/src/lxml
Message-ID: <20060508161510.015C11006E@code0.codespeak.net>
Author: scoder
Date: Mon May 8 18:15:09 2006
New Revision: 26973
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
cleanup
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Mon May 8 18:15:09 2006
@@ -62,14 +62,14 @@
__GLOBAL_PARSER_CONTEXT = _ParserContext()
############################################################
-## support for reading Python unicode
+## support for Python unicode I/O
############################################################
-# can libxml2 read plain Python unicode data?
+# name of Python unicode encoding as known to libxml2
cdef char* _UNICODE_ENCODING
_UNICODE_ENCODING = NULL
-cdef void _setupUnicodeParser():
+cdef void _setupPythonUnicode():
"""Sets _READ_UNICODE to 1 if libxml2 supports reading native Python
unicode. This depends on iconv, so we simply check if we find a matching
encoding handler.
@@ -100,7 +100,7 @@
else:
return tree.xmlGetCharEncodingName(enc)
-_setupUnicodeParser()
+_setupPythonUnicode()
############################################################
## support for file-like objects
From scoder at codespeak.net Mon May 8 18:49:00 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Mon May 8 18:49:01 2006
Subject: [Lxml-checkins] r26974 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20060508164900.1DD881006E@code0.codespeak.net>
Author: scoder
Date: Mon May 8 18:48:58 2006
New Revision: 26974
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_etree.py
lxml/trunk/src/lxml/tree.pxd
Log:
module level 'tounicode' function to return Python unicode string
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Mon May 8 18:48:58 2006
@@ -7,6 +7,9 @@
Features added
--------------
+* Module level `tounicode` function to return XML serialization as Python
+ unicode string (equavalent to `tostring` function)
+
* Parsing a unicode string no longer copies the string (reduced memory
footprint)
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Mon May 8 18:48:58 2006
@@ -1409,15 +1409,42 @@
# encoding during output
enchandler = tree.xmlFindCharEncodingHandler(enc)
c_buffer = tree.xmlAllocOutputBuffer(enchandler)
- tree.xmlNodeDumpOutput(c_buffer, doc._c_doc, element._c_node, 0, 0,
- enc)
- _dumpNextNode(c_buffer, doc._c_doc, element._c_node, enc)
- tree.xmlOutputBufferFlush(c_buffer)
- if c_buffer.conv is not NULL:
- result = tree.xmlBufferContent(c_buffer.conv)
- else:
- result = tree.xmlBufferContent(c_buffer.buffer)
- tree.xmlOutputBufferClose(c_buffer)
+ try:
+ tree.xmlNodeDumpOutput(c_buffer, doc._c_doc, element._c_node, 0, 0, enc)
+ _dumpNextNode(c_buffer, doc._c_doc, element._c_node, enc)
+ tree.xmlOutputBufferFlush(c_buffer)
+ if c_buffer.conv is not NULL:
+ result = tree.xmlBufferContent(c_buffer.conv)
+ else:
+ result = tree.xmlBufferContent(c_buffer.buffer)
+ finally:
+ tree.xmlOutputBufferClose(c_buffer)
+ return result
+
+def tounicode(_NodeBase element):
+ cdef _Document doc
+ cdef tree.xmlOutputBuffer* c_buffer
+ cdef tree.xmlBuffer* c_result_buffer
+
+ assert element is not None
+ # better, but not ET compatible : "_NodeBase element not None"
+
+ doc = element._doc
+ c_buffer = tree.xmlAllocOutputBuffer(NULL)
+ try:
+ tree.xmlNodeDumpOutput(c_buffer, doc._c_doc, element._c_node, 0, 0, NULL)
+ _dumpNextNode(c_buffer, doc._c_doc, element._c_node, NULL)
+ tree.xmlOutputBufferFlush(c_buffer)
+ if c_buffer.conv is not NULL:
+ c_result_buffer = c_buffer.conv
+ else:
+ c_result_buffer = c_buffer.buffer
+ result = python.PyUnicode_DecodeUTF8(
+ tree.xmlBufferContent(c_result_buffer),
+ tree.xmlBufferLength(c_result_buffer),
+ 'strict')
+ finally:
+ tree.xmlOutputBufferClose(c_buffer)
return result
def parse(source, parser=None):
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Mon May 8 18:48:58 2006
@@ -425,6 +425,54 @@
self.assertEquals(docinfo.root_name, 'html')
self.assertEquals(docinfo.doctype, '')
+ def test_tounicode(self):
+ tounicode = self.etree.tounicode
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+
+ self.assert_(isinstance(tounicode(a), unicode))
+ self.assertEquals(' ',
+ canonicalize(tounicode(a)))
+
+ def test_tounicode_element(self):
+ tounicode = self.etree.tounicode
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+ d = SubElement(c, 'd')
+ self.assert_(isinstance(tounicode(b), unicode))
+ self.assert_(isinstance(tounicode(c), unicode))
+ self.assertEquals(' ',
+ canonicalize(tounicode(b)))
+ self.assertEquals(' ',
+ canonicalize(tounicode(c)))
+
+ def test_tounicode_none(self):
+ tounicode = self.etree.tounicode
+ self.assertRaises(AssertionError, self.etree.tounicode, None)
+
+ def test_tounicode_element_tail(self):
+ tounicode = self.etree.tounicode
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+ d = SubElement(c, 'd')
+ b.tail = 'Foo'
+
+ self.assert_(isinstance(tounicode(b), unicode))
+ self.assert_(tounicode(b) == ' Foo' or
+ tounicode(b) == ' Foo')
+
def _writeElement(self, element, encoding='us-ascii'):
"""Write out element for comparison.
"""
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Mon May 8 18:48:58 2006
@@ -199,6 +199,7 @@
cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree)
cdef xmlBuffer* xmlBufferCreate()
cdef char* xmlBufferContent(xmlBuffer* buf)
+ cdef int xmlBufferLength(xmlBuffer* buf)
cdef extern from "libxml/xmlIO.h":
cdef xmlOutputBuffer* xmlAllocOutputBuffer(xmlCharEncodingHandler* encoder)
From scoder at codespeak.net Mon May 8 19:05:02 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Mon May 8 19:05:03 2006
Subject: [Lxml-checkins] r26975 - lxml/trunk
Message-ID: <20060508170502.3FCEF1006E@code0.codespeak.net>
Author: scoder
Date: Mon May 8 19:05:01 2006
New Revision: 26975
Modified:
lxml/trunk/CHANGES.txt
Log:
typo
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Mon May 8 19:05:01 2006
@@ -8,7 +8,7 @@
--------------
* Module level `tounicode` function to return XML serialization as Python
- unicode string (equavalent to `tostring` function)
+ unicode string (equivalent to `tostring` function)
* Parsing a unicode string no longer copies the string (reduced memory
footprint)
From scoder at codespeak.net Mon May 8 19:35:05 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Mon May 8 19:35:06 2006
Subject: [Lxml-checkins] r26977 - lxml/trunk/src/lxml
Message-ID: <20060508173505.6C0B910070@code0.codespeak.net>
Author: scoder
Date: Mon May 8 19:35:03 2006
New Revision: 26977
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
potential bug in string conversion (if it's not a string)
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Mon May 8 19:35:03 2006
@@ -1404,7 +1404,7 @@
if encoding in ('utf8', 'UTF8', 'utf-8'):
encoding = 'UTF-8'
doc = element._doc
- enc = _cstr(encoding)
+ enc = encoding
# it is necessary to *and* find the encoding handler *and* use
# encoding during output
enchandler = tree.xmlFindCharEncodingHandler(enc)
From scoder at codespeak.net Mon May 8 21:22:58 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Mon May 8 21:23:00 2006
Subject: [Lxml-checkins] r26978 - lxml/trunk/doc
Message-ID: <20060508192258.B98051006E@code0.codespeak.net>
Author: scoder
Date: Mon May 8 21:22:57 2006
New Revision: 26978
Modified:
lxml/trunk/doc/compatibility.txt
Log:
note on differences in unicode string parsing between etree and ElementTree
Modified: lxml/trunk/doc/compatibility.txt
==============================================================================
--- lxml/trunk/doc/compatibility.txt (original)
+++ lxml/trunk/doc/compatibility.txt Mon May 8 21:22:57 2006
@@ -32,6 +32,22 @@
XPath, XSLT, Relax NG, and XML Schema support, which (c)ElementTree
does not offer.
+* etree has a different idea about Python unicode strings than ElementTree.
+ In most parts of the API, ElementTree uses plain strings and unicode strings
+ as what they are. This includes Element.text, Element.tail and many other
+ places. However, the ElementTree parsers assume by default that any string
+ (`str` or `unicode`) contains ASCII data and raise an exception if strings
+ do not match the expected encoding.
+
+ etree has the same idea about plain strings (`str`) as ElementTree. For
+ unicode strings, however, etree assumes throughout the API that they are
+ Python unicode encoded strings rather than byte data. This includes the
+ parsers. It is therefore perfectly correct to pass XML unicode data into
+ the etree parsers in form of Python unicode strings. It is an error, on the
+ other hand, if unicode strings specify an encoding in their XML declaration.
+ Note also that Python unicode strings are platform specific. Such an
+ encoding specifier would not be portable.
+
* ElementTree allows you to place an Element in two different trees as the
same time. Thus, this::
From scoder at codespeak.net Tue May 9 11:15:32 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Tue May 9 11:15:33 2006
Subject: [Lxml-checkins] r26990 - lxml/trunk/src/lxml
Message-ID: <20060509091532.4863610071@code0.codespeak.net>
Author: scoder
Date: Tue May 9 11:15:30 2006
New Revision: 26990
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
doc updates
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Tue May 9 11:15:30 2006
@@ -70,9 +70,10 @@
_UNICODE_ENCODING = NULL
cdef void _setupPythonUnicode():
- """Sets _READ_UNICODE to 1 if libxml2 supports reading native Python
- unicode. This depends on iconv, so we simply check if we find a matching
- encoding handler.
+ """Sets _UNICODE_ENCODING to the internal encoding name of Python unicode
+ strings if libxmls supports reading native Python unicode. This depends
+ on iconv and the local Python installation, so we simply check if we find
+ a matching encoding handler.
"""
cdef Py_ssize_t l
cdef char* buffer
From scoder at codespeak.net Tue May 9 12:13:22 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Tue May 9 12:13:24 2006
Subject: [Lxml-checkins] r26992 - lxml/trunk
Message-ID: <20060509101322.71B6010071@code0.codespeak.net>
Author: scoder
Date: Tue May 9 12:13:21 2006
New Revision: 26992
Modified:
lxml/trunk/CHANGES.txt
Log:
doc updates
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Tue May 9 12:13:21 2006
@@ -25,6 +25,8 @@
* etree module can be compiled without libxslt by commenting out the line
'include "xslt.pxi"' near the end of the etree.pyx source file
+* Better error messages in parser exceptions
+
* Error reporting now also works in XSLT
* Support for custom document loaders (URI resolvers) in parsers and XSLT,
From scoder at codespeak.net Wed May 10 09:30:55 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 10 09:30:57 2006
Subject: [Lxml-checkins] r27024 - in lxml/trunk: . doc src/lxml
src/lxml/tests
Message-ID: <20060510073055.ECBAF10089@code0.codespeak.net>
Author: scoder
Date: Wed May 10 09:30:52 2006
New Revision: 27024
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/api.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/python.pxd
lxml/trunk/src/lxml/tests/test_xslt.py
lxml/trunk/src/lxml/xslt.pxd
lxml/trunk/src/lxml/xslt.pxi
Log:
fix str() decoding bug in _XSLTResultTree.__str__ for non-UTF8 encodings, make unicode() work on _XSLTResultTree
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed May 10 09:30:52 2006
@@ -7,6 +7,8 @@
Features added
--------------
+* Support for writing XSLT results to Python unicode strings via `unicode()`
+
* Module level `tounicode` function to return XML serialization as Python
unicode string (equivalent to `tostring` function)
@@ -56,6 +58,8 @@
Bugs fixed
----------
+* str(xslt_result) was broken for output other than UTF-8
+
* Memory leak if write_c14n fails to write the file after conversion
* ElementTree.xpath() and XPathDocumentEvaluator were not using the
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Wed May 10 09:30:52 2006
@@ -187,7 +187,6 @@
>>> f = StringIO('''\
...
- ...
...
...
...
@@ -213,6 +212,33 @@
>>> str(result)
'\nText \n'
+The result is always a plain string, encoded as requested by the `xsl:output`
+element in the stylesheet. If you want a Python unicode string instead, you
+should set this encoding to `UTF-8` (or leave it as the `ASCII` default).
+This allows you to call the `unicode()` function on the result::
+
+ >>> unicode(result)
+ u'\nText \n'
+
+However, encodings that are not supported by Python will result in an error::
+
+ >>> f = StringIO('''\
+ ...
+ ...
+ ...
+ ...
+ ...
+ ... ''')
+ >>> xslt_doc = lxml.etree.parse(f)
+ >>> transform = lxml.etree.XSLT(xslt_doc)
+
+ >>> result = transform(doc)
+ >>> unicode(result)
+ Traceback (most recent call last):
+ [...]
+ LookupError: unknown encoding: UCS4
+
It is possible to pass parameters, in the form of XPath expressions, to the
XSLT template::
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Wed May 10 09:30:52 2006
@@ -121,6 +121,14 @@
if c_next is not NULL and c_next.type == tree.XML_TEXT_NODE:
tree.xmlNodeDumpOutput(c_buffer, c_doc, c_next, 0, 0, encoding)
+cdef object __REPLACE_XML_ENCODING
+__REPLACE_XML_ENCODING = re.compile(
+ r'^(\s*<\?\s*xml[^>]+)\s+encoding\s*=\s*"[^"]*"\s*', re.U).sub
+
+cdef object _stripEncodingDeclaration(object xml_string):
+ # this is a hack to remove the XML encoding declaration from unicode
+ return __REPLACE_XML_ENCODING(r'\g<1>', xml_string)
+
cdef object _stripDeclaration(object xml_string):
# this is a hack to remove the XML declaration when we encode to UTF-8
xml_string = xml_string.strip()
Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd (original)
+++ lxml/trunk/src/lxml/python.pxd Wed May 10 09:30:52 2006
@@ -15,6 +15,8 @@
cdef object PyUnicode_FromEncodedObject(object s, char* encoding,
char* errors)
+ cdef object PyUnicode_Decode(char* s, Py_ssize_t size,
+ char* encoding, char* errors)
cdef object PyUnicode_DecodeUTF8(char* s, Py_ssize_t size, char* errors)
cdef object PyUnicode_AsUTF8String(object ustring)
cdef char* PyUnicode_AS_DATA(object ustring)
Modified: lxml/trunk/src/lxml/tests/test_xslt.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xslt.py (original)
+++ lxml/trunk/src/lxml/tests/test_xslt.py Wed May 10 09:30:52 2006
@@ -30,6 +30,66 @@
''',
st.tostring(res))
+ def test_xslt_utf8(self):
+ tree = self.parse(u'\uF8D2 \uF8D2 ')
+ style = self.parse('''\
+
+
+
+
+
+ ''')
+
+ st = etree.XSLT(style)
+ res = st.apply(tree)
+ expected = u'''\
+
+\uF8D2
+'''
+ self.assertEquals(expected,
+ unicode(str(res), 'UTF-8'))
+
+ def test_xslt_encoding(self):
+ tree = self.parse(u'\uF8D2 \uF8D2 ')
+ style = self.parse('''\
+
+
+
+
+
+ ''')
+
+ st = etree.XSLT(style)
+ res = st.apply(tree)
+ expected = u'''\
+
+\uF8D2
+'''
+ self.assertEquals(expected,
+ unicode(str(res), 'UTF-16'))
+
+ def test_xslt_unicode(self):
+ tree = self.parse(u'\uF8D2 \uF8D2 ')
+ style = self.parse('''\
+
+
+
+
+
+ ''')
+
+ st = etree.XSLT(style)
+ res = st.apply(tree)
+ expected = u'''\
+
+\uF8D2
+'''
+ self.assertEquals(expected,
+ unicode(res))
+
def test_exslt(self):
tree = self.parse('B C ')
style = self.parse('''\
@@ -250,10 +310,11 @@
etree.tostring(result.getroot())
result = transform.apply(source)
etree.tostring(result.getroot())
-
- result = transform(source)
- result = transform(source)
str(result)
+
+ result1 = transform(source)
+ result2 = transform(source)
+ self.assertEquals(str(result1), str(result2))
result = transform(source)
str(result)
Modified: lxml/trunk/src/lxml/xslt.pxd
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxd (original)
+++ lxml/trunk/src/lxml/xslt.pxd Wed May 10 09:30:52 2006
@@ -12,6 +12,7 @@
xmlDoc* doc
ctypedef struct xsltStylesheet:
+ char* encoding
xmlDoc* doc
ctypedef struct xsltTransformContext:
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Wed May 10 09:30:52 2006
@@ -315,20 +315,38 @@
cdef class _XSLTResultTree(_ElementTree):
cdef XSLT _xslt
- def __str__(self):
- cdef char* s
- cdef int l
+ cdef _saveToStringAndSize(self, char** s, int* l):
cdef int r
- r = xslt.xsltSaveResultToString(&s, &l, self._doc._c_doc,
+ r = xslt.xsltSaveResultToString(s, l, self._doc._c_doc,
self._xslt._c_style)
if r == -1:
raise XSLTSaveError, "Error saving XSLT result to string"
+
+ def __str__(self):
+ cdef char* s
+ cdef int l
+ self._saveToStringAndSize(&s, &l)
if s is NULL:
return ''
- result = funicode(s)
+ # we must not use 'funicode' here as this is not always UTF-8
+ result = python.PyString_FromStringAndSize(s, l)
tree.xmlFree(s)
return result
+ def __unicode__(self):
+ cdef char* encoding
+ cdef char* s
+ cdef int l
+ self._saveToStringAndSize(&s, &l)
+ if s is NULL:
+ return unicode()
+ encoding = self._xslt._c_style.encoding
+ if encoding is NULL:
+ encoding = 'ascii'
+ result = python.PyUnicode_Decode(s, l, encoding, 'strict')
+ tree.xmlFree(s)
+ return _stripEncodingDeclaration(result)
+
cdef _xsltResultTreeFactory(_Document doc, XSLT xslt):
cdef _XSLTResultTree result
result = <_XSLTResultTree>_newElementTree(doc, None, _XSLTResultTree)
From scoder at codespeak.net Wed May 10 10:38:25 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 10 10:38:27 2006
Subject: [Lxml-checkins] r27028 - lxml/trunk/src/lxml
Message-ID: <20060510083825.BB09710085@code0.codespeak.net>
Author: scoder
Date: Wed May 10 10:38:24 2006
New Revision: 27028
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
cleanup
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Wed May 10 10:38:24 2006
@@ -377,12 +377,13 @@
ctxt.lastError.domain == xmlerror.XML_FROM_IO:
if ctxt.lastError.message is not NULL:
message = "Error reading file %s: %s" % (
- funicode(c_filename), funicode(ctxt.lastError.message))
+ funicode(c_filename),
+ funicode(ctxt.lastError.message).strip())
else:
message = "Error reading file %s" % funicode(c_filename)
raise IOError, message
elif ctxt.lastError.message is not NULL:
- raise XMLSyntaxError, funicode(ctxt.lastError.message)
+ raise XMLSyntaxError, funicode(ctxt.lastError.message).strip()
else:
raise XMLSyntaxError
@@ -652,7 +653,7 @@
if python.PyUnicode_Check(text):
# pass native unicode only if libxml2 can handle it
if _UNICODE_ENCODING is NULL:
- text = _stripDeclaration(_utf8(text))
+ text = _stripEncodingDeclaration(_utf8(text))
else:
text = _utf8(text)
if url is not None:
From scoder at codespeak.net Wed May 10 10:49:18 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 10 10:49:19 2006
Subject: [Lxml-checkins] r27029 - in lxml/trunk: . doc src/lxml
src/lxml/tests
Message-ID: <20060510084918.032FE10087@code0.codespeak.net>
Author: scoder
Date: Wed May 10 10:49:16 2006
New Revision: 27029
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/api.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_etree.py
Log:
add __unicode__ method to _NodeBase and _ElementTree to let them return a XML unicode string of their tree on unicode() calls; cleanup and new doctests in api.txt
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed May 10 10:49:16 2006
@@ -7,10 +7,13 @@
Features added
--------------
-* Support for writing XSLT results to Python unicode strings via `unicode()`
+* Support for writing the XML representation of Elements and ElementTrees to
+ Python unicode strings via ``unicode()``
+
+* Support for writing XSLT results to Python unicode strings via ``unicode()``
* Module level `tounicode` function to return XML serialization as Python
- unicode string (equivalent to `tostring` function)
+ unicode string (equivalent to ``tostring`` function)
* Parsing a unicode string no longer copies the string (reduced memory
footprint)
@@ -25,7 +28,7 @@
information, original encoding and XML version as seen by the parser
* etree module can be compiled without libxslt by commenting out the line
- 'include "xslt.pxi"' near the end of the etree.pyx source file
+ ``include "xslt.pxi"`` near the end of the etree.pyx source file
* Better error messages in parser exceptions
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Wed May 10 10:49:16 2006
@@ -16,7 +16,7 @@
The following examples usually assume this to be executed first::
- >>> import lxml.etree
+ >>> from lxml import etree
>>> from StringIO import StringIO
If you need to know which version of lxml is installed, you can access the
@@ -37,13 +37,13 @@
>>> xml = ' '
- >>> et = lxml.etree.parse(StringIO(xml))
- >>> print lxml.etree.tostring(et.getroot())
+ >>> et = etree.parse(StringIO(xml))
+ >>> print etree.tostring(et.getroot())
- >>> parser = lxml.etree.XMLParser(ns_clean=True)
- >>> et = lxml.etree.parse(StringIO(xml), parser)
- >>> print lxml.etree.tostring(et.getroot())
+ >>> parser = etree.XMLParser(ns_clean=True)
+ >>> et = etree.parse(StringIO(xml), parser)
+ >>> print etree.tostring(et.getroot())
HTML parsing is similarly simple. The parsers have a ``recover`` keyword
@@ -54,17 +54,17 @@
>>> broken_html = "test ''')
- >>> xslt_doc = lxml.etree.parse(f)
- >>> transform = lxml.etree.XSLT(xslt_doc)
+ >>> xslt_doc = etree.parse(f)
+ >>> transform = etree.XSLT(xslt_doc)
You can then run the transformation on an ElementTree document by simply
calling it, and this results in another ElementTree object::
>>> f = StringIO('Text ')
- >>> doc = lxml.etree.parse(f)
+ >>> doc = etree.parse(f)
>>> result = transform(doc)
The result object can be accessed like a normal ElementTree document::
@@ -230,8 +279,8 @@
...
...
... ''')
- >>> xslt_doc = lxml.etree.parse(f)
- >>> transform = lxml.etree.XSLT(xslt_doc)
+ >>> xslt_doc = etree.parse(f)
+ >>> transform = etree.XSLT(xslt_doc)
>>> result = transform(doc)
>>> unicode(result)
@@ -250,10 +299,10 @@
...
...
... ''')
- >>> xslt_doc = lxml.etree.parse(f)
- >>> transform = lxml.etree.XSLT(xslt_doc)
+ >>> xslt_doc = etree.parse(f)
+ >>> transform = etree.XSLT(xslt_doc)
>>> f = StringIO('Text ')
- >>> doc = lxml.etree.parse(f)
+ >>> doc = etree.parse(f)
The parameters are passed as keyword parameters to the transform call. First
let's try passing in a simple string expression::
@@ -293,20 +342,20 @@
...
...
... ''')
- >>> relaxng_doc = lxml.etree.parse(f)
- >>> relaxng = lxml.etree.RelaxNG(relaxng_doc)
+ >>> relaxng_doc = etree.parse(f)
+ >>> relaxng = etree.RelaxNG(relaxng_doc)
You can then validate some ElementTree document against the schema. You'll get
back True if the document is valid against the Relax NG schema, and False if
not::
>>> valid = StringIO(' ')
- >>> doc = lxml.etree.parse(valid)
+ >>> doc = etree.parse(valid)
>>> relaxng.validate(doc)
1
>>> invalid = StringIO(' ')
- >>> doc2 = lxml.etree.parse(invalid)
+ >>> doc2 = etree.parse(invalid)
>>> relaxng.validate(doc2)
0
@@ -314,7 +363,7 @@
method. This is sometimes used in conditional statements::
>>> invalid = StringIO(' ')
- >>> doc2 = lxml.etree.parse(invalid)
+ >>> doc2 = etree.parse(invalid)
>>> if not relaxng(doc2):
... print "invalid!"
invalid!
@@ -375,20 +424,20 @@
...
...
... ''')
- >>> xmlschema_doc = lxml.etree.parse(f)
- >>> xmlschema = lxml.etree.XMLSchema(xmlschema_doc)
+ >>> xmlschema_doc = etree.parse(f)
+ >>> xmlschema = etree.XMLSchema(xmlschema_doc)
You can then validate some ElementTree document with this. Like with
RelaxNG, you'll get back true if the document is valid against the XML
schema, and false if not::
>>> valid = StringIO(' ')
- >>> doc = lxml.etree.parse(valid)
+ >>> doc = etree.parse(valid)
>>> xmlschema.validate(doc)
1
>>> invalid = StringIO(' ')
- >>> doc2 = lxml.etree.parse(invalid)
+ >>> doc2 = etree.parse(invalid)
>>> xmlschema.validate(doc2)
0
@@ -396,7 +445,7 @@
method. This is sometimes used in conditional statements::
>>> invalid = StringIO(' ')
- >>> doc2 = lxml.etree.parse(invalid)
+ >>> doc2 = etree.parse(invalid)
>>> if not xmlschema(doc2):
... print "invalid!"
invalid!
@@ -448,9 +497,9 @@
...
... ''')
- >>> tree = lxml.etree.parse(data)
+ >>> tree = etree.parse(data)
>>> tree.xinclude()
- >>> lxml.etree.tostring(tree.getroot())
+ >>> etree.tostring(tree.getroot())
'\n \n \n '
@@ -463,7 +512,7 @@
C14N recommendation. For example::
>>> f = StringIO(' ')
- >>> tree = lxml.etree.parse(f)
+ >>> tree = etree.parse(f)
>>> f2 = StringIO()
>>> tree.write_c14n(f2)
>>> f2.getvalue()
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Wed May 10 10:49:16 2006
@@ -319,6 +319,9 @@
unregisterProxy(self)
attemptDeallocation(self._c_node)
+ def __unicode__(self):
+ return tounicode(self)
+
def _init(self):
"""Called after object initialisation. Subclasses may override
this if they recursively call _init() in the superclasses.
@@ -387,6 +390,9 @@
return root.findall(path)
# extensions to ElementTree API
+ def __unicode__(self):
+ return tounicode(self._context_node)
+
def xpath(self, _path, namespaces=None, **_variables):
"""XPath evaluate in context of document.
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Wed May 10 10:49:16 2006
@@ -457,7 +457,7 @@
def test_tounicode_none(self):
tounicode = self.etree.tounicode
self.assertRaises(AssertionError, self.etree.tounicode, None)
-
+
def test_tounicode_element_tail(self):
tounicode = self.etree.tounicode
Element = self.etree.Element
@@ -472,7 +472,68 @@
self.assert_(isinstance(tounicode(b), unicode))
self.assert_(tounicode(b) == ' Foo' or
tounicode(b) == ' Foo')
+
+ def test_unicode(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+
+ self.assert_(isinstance(unicode(a), unicode))
+ self.assertEquals(' ',
+ canonicalize(unicode(a)))
+
+ def test_unicode_element(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+ d = SubElement(c, 'd')
+ self.assert_(isinstance(unicode(b), unicode))
+ self.assert_(isinstance(unicode(c), unicode))
+ self.assertEquals(' ',
+ canonicalize(unicode(b)))
+ self.assertEquals(' ',
+ canonicalize(unicode(c)))
+
+ def test_unicode_elementtree(self):
+ ElementTree = self.etree.ElementTree
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+ d = SubElement(c, 'd')
+
+ t = ElementTree(b)
+ self.assert_(isinstance(unicode(t), unicode))
+ self.assertEquals(' ',
+ canonicalize(unicode(t)))
+
+ t = ElementTree(c)
+ self.assert_(isinstance(unicode(t), unicode))
+ self.assertEquals(' ',
+ canonicalize(unicode(t)))
+
+ def test_tounicode_element_tail(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+ d = SubElement(c, 'd')
+ b.tail = 'Foo'
+
+ self.assert_(isinstance(unicode(b), unicode))
+ self.assert_(unicode(b) == ' Foo' or
+ unicode(b) == ' Foo')
+
def _writeElement(self, element, encoding='us-ascii'):
"""Write out element for comparison.
"""
From scoder at codespeak.net Wed May 10 10:52:40 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 10 10:52:40 2006
Subject: [Lxml-checkins] r27030 - lxml/trunk/doc
Message-ID: <20060510085240.224591008B@code0.codespeak.net>
Author: scoder
Date: Wed May 10 10:52:38 2006
New Revision: 27030
Modified:
lxml/trunk/doc/api.txt
Log:
api.txt: be more specific on unicode parser difference between etree and ElementTree
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Wed May 10 10:52:38 2006
@@ -126,7 +126,8 @@
----------------------
lxml.etree has broader support for Python unicode strings than the ElementTree
-library. First of all, its parsers can handle unicode strings straight away::
+library. First of all, where ElementTree would raise an exception, the
+parsers in lxml.etree can handle unicode strings straight away::
>>> uxml = u' \uf8d1 + \uf8d2 '
>>> uxml
From scoder at codespeak.net Wed May 10 11:04:01 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 10 11:04:02 2006
Subject: [Lxml-checkins] r27031 - lxml/trunk/doc
Message-ID: <20060510090401.3148D1008F@code0.codespeak.net>
Author: scoder
Date: Wed May 10 11:04:00 2006
New Revision: 27031
Modified:
lxml/trunk/doc/api.txt
Log:
new doctest
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Wed May 10 11:04:00 2006
@@ -156,6 +156,9 @@
On the output side, lxml.etree supports calling ``unicode()`` on XML tree
objects to retrieve a Python unicode representation::
+ >>> unicode(root)
+ u' \uf8d1 + \uf8d2 '
+
>>> el = etree.Element("test")
>>> unicode(el)
u' '
From scoder at codespeak.net Wed May 10 11:30:09 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 10 11:30:10 2006
Subject: [Lxml-checkins] r27034 - in lxml/trunk: . doc src/lxml
src/lxml/tests
Message-ID: <20060510093009.4632310092@code0.codespeak.net>
Author: scoder
Date: Wed May 10 11:30:06 2006
New Revision: 27034
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/api.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_etree.py
Log:
discarded new 'tounicode()' function again, replaced by standard unicode() call
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed May 10 11:30:06 2006
@@ -12,9 +12,6 @@
* Support for writing XSLT results to Python unicode strings via ``unicode()``
-* Module level `tounicode` function to return XML serialization as Python
- unicode string (equivalent to ``tostring`` function)
-
* Parsing a unicode string no longer copies the string (reduced memory
footprint)
@@ -61,7 +58,7 @@
Bugs fixed
----------
-* str(xslt_result) was broken for output other than UTF-8
+* str(xslt_result) was broken for XSLT output other than UTF-8
* Memory leak if write_c14n fails to write the file after conversion
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Wed May 10 11:30:06 2006
@@ -143,18 +143,18 @@
... print "This is not well-formed XML!"
This is not well-formed XML!
-To serialize the result, you can either use the normal ``tostring`` module
-function or the new ``tounicode`` function, which is only available in
-lxml.etree and always returns a Python unicode string::
+To serialize the result, you would normally use the ``tostring`` module
+function, which serializes to plain ASCII by default or a number of other
+encodings if asked for::
>>> etree.tostring(root)
' + '
- >>> etree.tounicode(root)
- u' \uf8d1 + \uf8d2 '
+ >>> etree.tostring(root, 'UTF-8')
+ ' \xef\xa3\x91 + \xef\xa3\x92 '
-On the output side, lxml.etree supports calling ``unicode()`` on XML tree
-objects to retrieve a Python unicode representation::
+As an extension, lxml.etree supports calling the builtin ``unicode()``
+function on XML tree objects to retrieve a Python unicode representation::
>>> unicode(root)
u' \uf8d1 + \uf8d2 '
@@ -164,6 +164,9 @@
u' '
>>> subel = etree.SubElement(el, "subtest")
+ >>> unicode(el)
+ u' '
+
>>> et = etree.ElementTree(el)
>>> unicode(et)
u' '
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Wed May 10 11:30:06 2006
@@ -1,5 +1,60 @@
# Private helper functions
+cdef _tostring(_NodeBase element, encoding):
+ "Serialize an element to an encoded string representation of its XML tree."
+ cdef _Document doc
+ cdef tree.xmlOutputBuffer* c_buffer
+ cdef tree.xmlCharEncodingHandler* enchandler
+ cdef char* enc
+ if element is None:
+ return None
+ #if encoding is None:
+ # encoding = 'UTF-8'
+ if encoding in ('utf8', 'UTF8', 'utf-8'):
+ encoding = 'UTF-8'
+ doc = element._doc
+ enc = encoding
+ # it is necessary to *and* find the encoding handler *and* use
+ # encoding during output
+ enchandler = tree.xmlFindCharEncodingHandler(enc)
+ c_buffer = tree.xmlAllocOutputBuffer(enchandler)
+ try:
+ tree.xmlNodeDumpOutput(c_buffer, doc._c_doc, element._c_node, 0, 0, enc)
+ _dumpNextNode(c_buffer, doc._c_doc, element._c_node, enc)
+ tree.xmlOutputBufferFlush(c_buffer)
+ if c_buffer.conv is not NULL:
+ result = tree.xmlBufferContent(c_buffer.conv)
+ else:
+ result = tree.xmlBufferContent(c_buffer.buffer)
+ finally:
+ tree.xmlOutputBufferClose(c_buffer)
+ return result
+
+cdef _tounicode(_NodeBase element):
+ "Serialize an element to the Python unicode representation of its XML tree."
+ cdef _Document doc
+ cdef tree.xmlOutputBuffer* c_buffer
+ cdef tree.xmlBuffer* c_result_buffer
+ if element is None:
+ return None
+ doc = element._doc
+ c_buffer = tree.xmlAllocOutputBuffer(NULL)
+ try:
+ tree.xmlNodeDumpOutput(c_buffer, doc._c_doc, element._c_node, 0, 0, NULL)
+ _dumpNextNode(c_buffer, doc._c_doc, element._c_node, NULL)
+ tree.xmlOutputBufferFlush(c_buffer)
+ if c_buffer.conv is not NULL:
+ c_result_buffer = c_buffer.conv
+ else:
+ c_result_buffer = c_buffer.buffer
+ result = python.PyUnicode_DecodeUTF8(
+ tree.xmlBufferContent(c_result_buffer),
+ tree.xmlBufferLength(c_result_buffer),
+ 'strict')
+ finally:
+ tree.xmlOutputBufferClose(c_buffer)
+ return result
+
cdef void displayNode(xmlNode* c_node, indent):
# to help with debugging
cdef xmlNode* c_child
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Wed May 10 11:30:06 2006
@@ -320,7 +320,7 @@
attemptDeallocation(self._c_node)
def __unicode__(self):
- return tounicode(self)
+ return _tounicode(self)
def _init(self):
"""Called after object initialisation. Subclasses may override
@@ -391,7 +391,7 @@
# extensions to ElementTree API
def __unicode__(self):
- return tounicode(self._context_node)
+ return _tounicode(self._context_node)
def xpath(self, _path, namespaces=None, **_variables):
"""XPath evaluate in context of document.
@@ -1396,62 +1396,16 @@
# better, but not ET compatible : "_NodeBase elem not None"
_dumpToFile(sys.stdout, elem._doc._c_doc, elem._c_node)
-def tostring(_NodeBase element, encoding='us-ascii'):
- cdef _Document doc
- cdef tree.xmlOutputBuffer* c_buffer
- cdef tree.xmlCharEncodingHandler* enchandler
- cdef char* enc
-
- assert element is not None
+def tostring(element_or_tree, encoding='us-ascii'):
+ "Serialize an element to an encoded string representation of its XML tree."
+ assert element_or_tree is not None
# better, but not ET compatible : "_NodeBase element not None"
-
- #if encoding is None:
- # encoding = 'UTF-8'
- if encoding in ('utf8', 'UTF8', 'utf-8'):
- encoding = 'UTF-8'
- doc = element._doc
- enc = encoding
- # it is necessary to *and* find the encoding handler *and* use
- # encoding during output
- enchandler = tree.xmlFindCharEncodingHandler(enc)
- c_buffer = tree.xmlAllocOutputBuffer(enchandler)
- try:
- tree.xmlNodeDumpOutput(c_buffer, doc._c_doc, element._c_node, 0, 0, enc)
- _dumpNextNode(c_buffer, doc._c_doc, element._c_node, enc)
- tree.xmlOutputBufferFlush(c_buffer)
- if c_buffer.conv is not NULL:
- result = tree.xmlBufferContent(c_buffer.conv)
- else:
- result = tree.xmlBufferContent(c_buffer.buffer)
- finally:
- tree.xmlOutputBufferClose(c_buffer)
- return result
-
-def tounicode(_NodeBase element):
- cdef _Document doc
- cdef tree.xmlOutputBuffer* c_buffer
- cdef tree.xmlBuffer* c_result_buffer
-
- assert element is not None
- # better, but not ET compatible : "_NodeBase element not None"
-
- doc = element._doc
- c_buffer = tree.xmlAllocOutputBuffer(NULL)
- try:
- tree.xmlNodeDumpOutput(c_buffer, doc._c_doc, element._c_node, 0, 0, NULL)
- _dumpNextNode(c_buffer, doc._c_doc, element._c_node, NULL)
- tree.xmlOutputBufferFlush(c_buffer)
- if c_buffer.conv is not NULL:
- c_result_buffer = c_buffer.conv
- else:
- c_result_buffer = c_buffer.buffer
- result = python.PyUnicode_DecodeUTF8(
- tree.xmlBufferContent(c_result_buffer),
- tree.xmlBufferLength(c_result_buffer),
- 'strict')
- finally:
- tree.xmlOutputBufferClose(c_buffer)
- return result
+ if isinstance(element_or_tree, _NodeBase):
+ return _tostring(<_NodeBase>element_or_tree, encoding)
+ elif isinstance(element_or_tree, _ElementTree):
+ return _tostring((<_ElementTree>element_or_tree)._context_node, encoding)
+ else:
+ raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree)
def parse(source, parser=None):
"""Return an ElementTree object loaded with source elements. If no parser
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Wed May 10 11:30:06 2006
@@ -425,54 +425,6 @@
self.assertEquals(docinfo.root_name, 'html')
self.assertEquals(docinfo.doctype, '')
- def test_tounicode(self):
- tounicode = self.etree.tounicode
- Element = self.etree.Element
- SubElement = self.etree.SubElement
-
- a = Element('a')
- b = SubElement(a, 'b')
- c = SubElement(a, 'c')
-
- self.assert_(isinstance(tounicode(a), unicode))
- self.assertEquals(' ',
- canonicalize(tounicode(a)))
-
- def test_tounicode_element(self):
- tounicode = self.etree.tounicode
- Element = self.etree.Element
- SubElement = self.etree.SubElement
-
- a = Element('a')
- b = SubElement(a, 'b')
- c = SubElement(a, 'c')
- d = SubElement(c, 'd')
- self.assert_(isinstance(tounicode(b), unicode))
- self.assert_(isinstance(tounicode(c), unicode))
- self.assertEquals(' ',
- canonicalize(tounicode(b)))
- self.assertEquals(' ',
- canonicalize(tounicode(c)))
-
- def test_tounicode_none(self):
- tounicode = self.etree.tounicode
- self.assertRaises(AssertionError, self.etree.tounicode, None)
-
- def test_tounicode_element_tail(self):
- tounicode = self.etree.tounicode
- Element = self.etree.Element
- SubElement = self.etree.SubElement
-
- a = Element('a')
- b = SubElement(a, 'b')
- c = SubElement(a, 'c')
- d = SubElement(c, 'd')
- b.tail = 'Foo'
-
- self.assert_(isinstance(tounicode(b), unicode))
- self.assert_(tounicode(b) == ' Foo' or
- tounicode(b) == ' Foo')
-
def test_unicode(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
From scoder at codespeak.net Wed May 10 11:37:48 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 10 11:37:50 2006
Subject: [Lxml-checkins] r27035 - lxml/trunk/src/lxml
Message-ID: <20060510093748.0E98210093@code0.codespeak.net>
Author: scoder
Date: Wed May 10 11:37:46 2006
New Revision: 27035
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
doc fixes
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Wed May 10 11:37:46 2006
@@ -1398,8 +1398,7 @@
def tostring(element_or_tree, encoding='us-ascii'):
"Serialize an element to an encoded string representation of its XML tree."
- assert element_or_tree is not None
- # better, but not ET compatible : "_NodeBase element not None"
+ assert element_or_tree is not None # for ElementTree compatibility only
if isinstance(element_or_tree, _NodeBase):
return _tostring(<_NodeBase>element_or_tree, encoding)
elif isinstance(element_or_tree, _ElementTree):
From scoder at codespeak.net Wed May 10 11:50:14 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 10 11:50:15 2006
Subject: [Lxml-checkins] r27036 - in lxml/branch/lxml-0.9.x: . src/lxml
src/lxml/tests
Message-ID: <20060510095014.A1D891009D@code0.codespeak.net>
Author: scoder
Date: Wed May 10 11:50:13 2006
New Revision: 27036
Modified:
lxml/branch/lxml-0.9.x/CHANGES.txt
lxml/branch/lxml-0.9.x/src/lxml/etree.pyx
lxml/branch/lxml-0.9.x/src/lxml/tests/test_xslt.py
lxml/branch/lxml-0.9.x/src/lxml/xslt.pxi
Log:
merged in bug fixes from trunk: C14N memory leak, str() on encoded XSLT results
Modified: lxml/branch/lxml-0.9.x/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-0.9.x/CHANGES.txt (original)
+++ lxml/branch/lxml-0.9.x/CHANGES.txt Wed May 10 11:50:13 2006
@@ -19,6 +19,10 @@
Bugs fixed
----------
+* str(xslt_result) was broken for output other than UTF-8
+
+* Memory leak if write_c14n fails to write the file after conversion
+
* Crash in XMLSchema and RelaxNG when passing non-schema documents
* Memory leak in RelaxNG() when RelaxNGParseError is raised
Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/lxml-0.9.x/src/lxml/etree.pyx (original)
+++ lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Wed May 10 11:50:13 2006
@@ -348,10 +348,12 @@
if bytes < 0:
raise C14NError, "C14N failed"
- if not hasattr(file, 'write'):
- file = open(file, 'wb')
- file.write(data)
- tree.xmlFree(data)
+ try:
+ if not hasattr(file, 'write'):
+ file = open(file, 'wb')
+ file.write(data)
+ finally:
+ tree.xmlFree(data)
cdef _ElementTree _elementTreeFactory(_Document doc,
_NodeBase context_node):
Modified: lxml/branch/lxml-0.9.x/src/lxml/tests/test_xslt.py
==============================================================================
--- lxml/branch/lxml-0.9.x/src/lxml/tests/test_xslt.py (original)
+++ lxml/branch/lxml-0.9.x/src/lxml/tests/test_xslt.py Wed May 10 11:50:13 2006
@@ -29,6 +29,47 @@
B
''',
st.tostring(res))
+
+ def test_xslt_utf8(self):
+ tree = self.parse(u'\uF8D2 \uF8D2 ')
+ style = self.parse('''\
+
+
+
+
+
+ ''')
+
+ st = etree.XSLT(style)
+ res = st.apply(tree)
+ expected = u'''\
+
+\uF8D2
+'''
+ self.assertEquals(expected,
+ unicode(str(res), 'UTF-8'))
+
+ def test_xslt_encoding(self):
+ tree = self.parse(u'\uF8D2 \uF8D2 ')
+ style = self.parse('''\
+
+
+
+
+
+ ''')
+
+ st = etree.XSLT(style)
+ res = st.apply(tree)
+ expected = u'''\
+
+\uF8D2
+'''
+ self.assertEquals(expected,
+ unicode(str(res), 'UTF-16'))
+
def test_xslt_input(self):
tree = self.parse('B C ')
style = self.parse('''\
Modified: lxml/branch/lxml-0.9.x/src/lxml/xslt.pxi
==============================================================================
--- lxml/branch/lxml-0.9.x/src/lxml/xslt.pxi (original)
+++ lxml/branch/lxml-0.9.x/src/lxml/xslt.pxi Wed May 10 11:50:13 2006
@@ -361,7 +361,8 @@
raise XSLTSaveError, "Error saving XSLT result to string"
if s is NULL:
return ''
- result = funicode(s)
+ # we must not use 'funicode' here as this is not always UTF-8
+ result = python.PyString_FromStringAndSize(s, l)
tree.xmlFree(s)
return result
From scoder at codespeak.net Wed May 10 11:54:59 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 10 11:55:01 2006
Subject: [Lxml-checkins] r27037 - lxml/branch/lxml-0.9.x
Message-ID: <20060510095459.EF8C4100A2@code0.codespeak.net>
Author: scoder
Date: Wed May 10 11:54:58 2006
New Revision: 27037
Modified:
lxml/branch/lxml-0.9.x/CHANGES.txt
lxml/branch/lxml-0.9.x/version.txt
Log:
0.9.2
Modified: lxml/branch/lxml-0.9.x/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-0.9.x/CHANGES.txt (original)
+++ lxml/branch/lxml-0.9.x/CHANGES.txt Wed May 10 11:54:58 2006
@@ -1,8 +1,8 @@
lxml changelog
==============
-current
-=======
+0.9.2 (2006-05-10)
+==================
Features added
--------------
@@ -19,7 +19,7 @@
Bugs fixed
----------
-* str(xslt_result) was broken for output other than UTF-8
+* str(xslt_result) was broken for XSLT output other than UTF-8
* Memory leak if write_c14n fails to write the file after conversion
@@ -72,7 +72,7 @@
* XPath class for compiled XPath expressions
-* XMLID module level function
+* XMLID module level function (ElementTree compatible)
* XMLParser API for customized libxml2 parser configuration
Modified: lxml/branch/lxml-0.9.x/version.txt
==============================================================================
--- lxml/branch/lxml-0.9.x/version.txt (original)
+++ lxml/branch/lxml-0.9.x/version.txt Wed May 10 11:54:58 2006
@@ -1 +1 @@
-0.9.1
+0.9.2
From scoder at codespeak.net Wed May 10 12:25:45 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 10 12:25:46 2006
Subject: [Lxml-checkins] r27038 - lxml/trunk
Message-ID: <20060510102545.39F8D100AD@code0.codespeak.net>
Author: scoder
Date: Wed May 10 12:25:43 2006
New Revision: 27038
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/version.txt
Log:
cleanup and version bump after releasing 0.9.2
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed May 10 12:25:43 2006
@@ -46,6 +46,20 @@
* XMLDTDID function parses XML into tuple (root node, ID dict) based on xml:id
implementation of libxml2 (as opposed to ET compatible XMLID)
+Bugs fixed
+----------
+
+* ElementTree.xpath() and XPathDocumentEvaluator were not using the
+ ElementTree root node as reference point
+
+* Calling document('') in XSLT failed to return the stylesheet
+
+0.9.2 (2006-05-10)
+==================
+
+Features added
+--------------
+
* Speedup for Element.makeelement(): the new element now reuses the original
libxml2 document instead of creating a new empty one
@@ -62,11 +76,6 @@
* Memory leak if write_c14n fails to write the file after conversion
-* ElementTree.xpath() and XPathDocumentEvaluator were not using the
- ElementTree root node as reference point
-
-* Calling document('') in XSLT failed to return the stylesheet
-
* Crash in XMLSchema and RelaxNG when passing non-schema documents
* Memory leak in RelaxNG() when RelaxNGParseError is raised
Modified: lxml/trunk/version.txt
==============================================================================
--- lxml/trunk/version.txt (original)
+++ lxml/trunk/version.txt Wed May 10 12:25:43 2006
@@ -1 +1 @@
-0.9.1
+0.9.2
From scoder at codespeak.net Wed May 10 13:03:02 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 10 13:03:03 2006
Subject: [Lxml-checkins] r27043 - in lxml/trunk: . doc src/lxml
src/lxml/tests
Message-ID: <20060510110302.93A6310092@code0.codespeak.net>
Author: scoder
Date: Wed May 10 13:03:00 2006
New Revision: 27043
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/api.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_etree.py
Log:
reverted 'unicode()' calls on Element/ElementTree to 'tounicode()' module function
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed May 10 13:03:00 2006
@@ -8,7 +8,7 @@
--------------
* Support for writing the XML representation of Elements and ElementTrees to
- Python unicode strings via ``unicode()``
+ Python unicode strings via ``etree.tounicode()``
* Support for writing XSLT results to Python unicode strings via ``unicode()``
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Wed May 10 13:03:00 2006
@@ -153,30 +153,24 @@
>>> etree.tostring(root, 'UTF-8')
' \xef\xa3\x91 + \xef\xa3\x92 '
-As an extension, lxml.etree supports calling the builtin ``unicode()``
-function on XML tree objects to retrieve a Python unicode representation::
+As an extension, lxml.etree has a new ``lxml.etree.tounicode()`` function that
+you can call on XML tree objects to retrieve a Python unicode representation::
- >>> unicode(root)
+ >>> etree.tounicode(root)
u' \uf8d1 + \uf8d2 '
>>> el = etree.Element("test")
- >>> unicode(el)
+ >>> etree.tounicode(el)
u' '
>>> subel = etree.SubElement(el, "subtest")
- >>> unicode(el)
+ >>> etree.tounicode(el)
u' '
>>> et = etree.ElementTree(el)
- >>> unicode(et)
+ >>> etree.tounicode(et)
u' '
-Note, however, that the ``str()`` function behaves as in the ElementTree
-library and returns something like ````. This
-is due to the fact that this function implies no clear encoding semantics.
-The ``unicode()`` function, on the other hand, is specified to always returns
-a Python unicode string.
-
xpath method on ElementTree, Element
------------------------------------
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Wed May 10 13:03:00 2006
@@ -319,9 +319,6 @@
unregisterProxy(self)
attemptDeallocation(self._c_node)
- def __unicode__(self):
- return _tounicode(self)
-
def _init(self):
"""Called after object initialisation. Subclasses may override
this if they recursively call _init() in the superclasses.
@@ -390,9 +387,6 @@
return root.findall(path)
# extensions to ElementTree API
- def __unicode__(self):
- return _tounicode(self._context_node)
-
def xpath(self, _path, namespaces=None, **_variables):
"""XPath evaluate in context of document.
@@ -1406,6 +1400,16 @@
else:
raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree)
+def tounicode(element_or_tree):
+ "Serialize an element to the Python unicode representation of its XML tree."
+ assert element_or_tree is not None # for ElementTree compatibility only
+ if isinstance(element_or_tree, _NodeBase):
+ return _tounicode(<_NodeBase>element_or_tree)
+ elif isinstance(element_or_tree, _ElementTree):
+ return _tounicode((<_ElementTree>element_or_tree)._context_node)
+ else:
+ raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree)
+
def parse(source, parser=None):
"""Return an ElementTree object loaded with source elements. If no parser
is provided as second argument, the default parser is used.
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Wed May 10 13:03:00 2006
@@ -425,7 +425,8 @@
self.assertEquals(docinfo.root_name, 'html')
self.assertEquals(docinfo.doctype, '')
- def test_unicode(self):
+ def test_tounicode(self):
+ tounicode = self.etree.tounicode
Element = self.etree.Element
SubElement = self.etree.SubElement
@@ -433,11 +434,12 @@
b = SubElement(a, 'b')
c = SubElement(a, 'c')
- self.assert_(isinstance(unicode(a), unicode))
+ self.assert_(isinstance(tounicode(a), unicode))
self.assertEquals(' ',
- canonicalize(unicode(a)))
+ canonicalize(tounicode(a)))
- def test_unicode_element(self):
+ def test_tounicode_element(self):
+ tounicode = self.etree.tounicode
Element = self.etree.Element
SubElement = self.etree.SubElement
@@ -445,34 +447,19 @@
b = SubElement(a, 'b')
c = SubElement(a, 'c')
d = SubElement(c, 'd')
- self.assert_(isinstance(unicode(b), unicode))
- self.assert_(isinstance(unicode(c), unicode))
+ self.assert_(isinstance(tounicode(b), unicode))
+ self.assert_(isinstance(tounicode(c), unicode))
self.assertEquals(' ',
- canonicalize(unicode(b)))
+ canonicalize(tounicode(b)))
self.assertEquals(' ',
- canonicalize(unicode(c)))
+ canonicalize(tounicode(c)))
- def test_unicode_elementtree(self):
- ElementTree = self.etree.ElementTree
- Element = self.etree.Element
- SubElement = self.etree.SubElement
-
- a = Element('a')
- b = SubElement(a, 'b')
- c = SubElement(a, 'c')
- d = SubElement(c, 'd')
-
- t = ElementTree(b)
- self.assert_(isinstance(unicode(t), unicode))
- self.assertEquals(' ',
- canonicalize(unicode(t)))
-
- t = ElementTree(c)
- self.assert_(isinstance(unicode(t), unicode))
- self.assertEquals(' ',
- canonicalize(unicode(t)))
+ def test_tounicode_none(self):
+ tounicode = self.etree.tounicode
+ self.assertRaises(AssertionError, self.etree.tounicode, None)
def test_tounicode_element_tail(self):
+ tounicode = self.etree.tounicode
Element = self.etree.Element
SubElement = self.etree.SubElement
@@ -482,9 +469,9 @@
d = SubElement(c, 'd')
b.tail = 'Foo'
- self.assert_(isinstance(unicode(b), unicode))
- self.assert_(unicode(b) == ' Foo' or
- unicode(b) == ' Foo')
+ self.assert_(isinstance(tounicode(b), unicode))
+ self.assert_(tounicode(b) == ' Foo' or
+ tounicode(b) == ' Foo')
def _writeElement(self, element, encoding='us-ascii'):
"""Write out element for comparison.
From scoder at codespeak.net Wed May 10 13:13:27 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 10 13:13:29 2006
Subject: [Lxml-checkins] r27044 - lxml/trunk/doc
Message-ID: <20060510111327.9A3A910092@code0.codespeak.net>
Author: scoder
Date: Wed May 10 13:13:26 2006
New Revision: 27044
Modified:
lxml/trunk/doc/api.txt
Log:
doc cleanup in api.txt->XSLT
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Wed May 10 13:13:26 2006
@@ -272,7 +272,7 @@
However, encodings that are not supported by Python will result in an error::
- >>> f = StringIO('''\
+ >>> xslt_tree = etree.XML('''\
...
...
@@ -280,8 +280,7 @@
...
...
... ''')
- >>> xslt_doc = etree.parse(f)
- >>> transform = etree.XSLT(xslt_doc)
+ >>> transform = etree.XSLT(xslt_tree)
>>> result = transform(doc)
>>> unicode(result)
@@ -292,16 +291,14 @@
It is possible to pass parameters, in the form of XPath expressions, to the
XSLT template::
- >>> f = StringIO('''\
+ >>> xslt_tree = etree.XML('''\
...
- ...
...
...
...
... ''')
- >>> xslt_doc = etree.parse(f)
- >>> transform = etree.XSLT(xslt_doc)
+ >>> transform = etree.XSLT(xslt_tree)
>>> f = StringIO('Text ')
>>> doc = etree.parse(f)
@@ -323,7 +320,7 @@
transformation to multiple documents, but is shorter to write for one-shot
operations, as you do not have to instantiate a stylesheet yourself::
- >>> result = doc.xslt(xslt_doc, a="'A'")
+ >>> result = doc.xslt(xslt_tree, a="'A'")
>>> str(result)
'\nA \n'
From scoder at codespeak.net Wed May 10 21:47:00 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 10 21:47:01 2006
Subject: [Lxml-checkins] r27056 - lxml/trunk/src/lxml
Message-ID: <20060510194700.AE07110089@code0.codespeak.net>
Author: scoder
Date: Wed May 10 21:46:59 2006
New Revision: 27056
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
doc: state that the result of tounicode() does not carry an encoding declaration which might be a problem
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Wed May 10 21:46:59 2006
@@ -1401,7 +1401,12 @@
raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree)
def tounicode(element_or_tree):
- "Serialize an element to the Python unicode representation of its XML tree."
+ """Serialize an element to the Python unicode representation of its XML
+ tree.
+
+ Note that the result does not carry an XML encoding declaration and is
+ therefore not necessarily suited for serialization without further
+ treatment."""
assert element_or_tree is not None # for ElementTree compatibility only
if isinstance(element_or_tree, _NodeBase):
return _tounicode(<_NodeBase>element_or_tree)
From scoder at codespeak.net Wed May 10 21:47:56 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 10 21:47:58 2006
Subject: [Lxml-checkins] r27057 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20060510194756.5846210088@code0.codespeak.net>
Author: scoder
Date: Wed May 10 21:47:54 2006
New Revision: 27057
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
fix: tostring() failed to serialize encodings that contain 0-bytes
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed May 10 21:47:54 2006
@@ -49,10 +49,12 @@
Bugs fixed
----------
+* ``tostring()`` failed to serialize encodings that contain 0-bytes
+
* ElementTree.xpath() and XPathDocumentEvaluator were not using the
ElementTree root node as reference point
-* Calling document('') in XSLT failed to return the stylesheet
+* Calling ``document('')`` in XSLT failed to return the stylesheet
0.9.2 (2006-05-10)
==================
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Wed May 10 21:47:54 2006
@@ -4,6 +4,7 @@
"Serialize an element to an encoded string representation of its XML tree."
cdef _Document doc
cdef tree.xmlOutputBuffer* c_buffer
+ cdef tree.xmlBuffer* c_result_buffer
cdef tree.xmlCharEncodingHandler* enchandler
cdef char* enc
if element is None:
@@ -22,10 +23,13 @@
tree.xmlNodeDumpOutput(c_buffer, doc._c_doc, element._c_node, 0, 0, enc)
_dumpNextNode(c_buffer, doc._c_doc, element._c_node, enc)
tree.xmlOutputBufferFlush(c_buffer)
- if c_buffer.conv is not NULL:
- result = tree.xmlBufferContent(c_buffer.conv)
+ if c_buffer.conv is not NULL:
+ c_result_buffer = c_buffer.conv
else:
- result = tree.xmlBufferContent(c_buffer.buffer)
+ c_result_buffer = c_buffer.buffer
+ result = python.PyString_FromStringAndSize(
+ tree.xmlBufferContent(c_result_buffer),
+ tree.xmlBufferLength(c_result_buffer))
finally:
tree.xmlOutputBufferClose(c_buffer)
return result
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Wed May 10 21:47:54 2006
@@ -1546,6 +1546,19 @@
self.assertEquals(' ',
canonicalize(tostring(a)))
+
+ def test_tostring_encoding(self):
+ tostring = self.etree.tostring
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+
+ result = unicode(tostring(a, 'UTF-16'), 'UTF-16')
+ self.assertEquals(' ',
+ canonicalize(result))
def test_tostring_element(self):
tostring = self.etree.tostring
From scoder at codespeak.net Wed May 10 22:29:40 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 10 22:29:42 2006
Subject: [Lxml-checkins] r27058 - lxml/trunk/src/lxml/tests
Message-ID: <20060510202940.042ED1007E@code0.codespeak.net>
Author: scoder
Date: Wed May 10 22:29:40 2006
New Revision: 27058
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
moved test case to similar test cases
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Wed May 10 22:29:40 2006
@@ -1546,19 +1546,6 @@
self.assertEquals(' ',
canonicalize(tostring(a)))
-
- def test_tostring_encoding(self):
- tostring = self.etree.tostring
- Element = self.etree.Element
- SubElement = self.etree.SubElement
-
- a = Element('a')
- b = SubElement(a, 'b')
- c = SubElement(a, 'c')
-
- result = unicode(tostring(a, 'UTF-16'), 'UTF-16')
- self.assertEquals(' ',
- canonicalize(result))
def test_tostring_element(self):
tostring = self.etree.tostring
@@ -1698,7 +1685,20 @@
a = Element('a')
a.text = u'S?k p? nettet'
self.assert_(tostring(a, 'UTF-8') in [xml, prologue + xml])
+
+ def test_encoding_tostring_utf16(self):
+ tostring = self.etree.tostring
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+
+ result = unicode(tostring(a, 'UTF-16'), 'UTF-16')
+ self.assertEquals(' ',
+ canonicalize(result))
+
def test_encoding_tostring_sub(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
From scoder at codespeak.net Wed May 10 22:35:47 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Wed May 10 22:35:50 2006
Subject: [Lxml-checkins] r27059 - in lxml/trunk: . doc src/lxml
Message-ID: <20060510203547.2324F1007E@code0.codespeak.net>
Author: scoder
Date: Wed May 10 22:35:44 2006
New Revision: 27059
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/api.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/etree.pyx
Log:
fix tostring() to raise exception on buffer alloc errors; support writing XML declaration in tostring(), can be suppressed by xml_declaration keyword
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed May 10 22:35:44 2006
@@ -49,6 +49,8 @@
Bugs fixed
----------
+* ``tostring()`` now adds an XML declaration for non-ASCII encodings
+
* ``tostring()`` failed to serialize encodings that contain 0-bytes
* ElementTree.xpath() and XPathDocumentEvaluator were not using the
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Wed May 10 22:35:44 2006
@@ -150,7 +150,7 @@
>>> etree.tostring(root)
' + '
- >>> etree.tostring(root, 'UTF-8')
+ >>> etree.tostring(root, 'UTF-8', xml_declaration=False)
' \xef\xa3\x91 + \xef\xa3\x92 '
As an extension, lxml.etree has a new ``lxml.etree.tounicode()`` function that
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Wed May 10 22:35:44 2006
@@ -1,6 +1,6 @@
# Private helper functions
-cdef _tostring(_NodeBase element, encoding):
+cdef _tostring(_NodeBase element, encoding, int xml_declaration):
"Serialize an element to an encoded string representation of its XML tree."
cdef _Document doc
cdef tree.xmlOutputBuffer* c_buffer
@@ -9,8 +9,6 @@
cdef char* enc
if element is None:
return None
- #if encoding is None:
- # encoding = 'UTF-8'
if encoding in ('utf8', 'UTF8', 'utf-8'):
encoding = 'UTF-8'
doc = element._doc
@@ -19,6 +17,22 @@
# encoding during output
enchandler = tree.xmlFindCharEncodingHandler(enc)
c_buffer = tree.xmlAllocOutputBuffer(enchandler)
+ if c_buffer is NULL:
+ raise LxmlError, "Failed to create output buffer"
+
+ if xml_declaration:
+ if doc._c_doc.version is NULL:
+ version = "1.0"
+ else:
+ version = doc._c_doc.version
+ xml_decl = "" % (
+ version, encoding)
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
+
try:
tree.xmlNodeDumpOutput(c_buffer, doc._c_doc, element._c_node, 0, 0, enc)
_dumpNextNode(c_buffer, doc._c_doc, element._c_node, enc)
@@ -43,6 +57,8 @@
return None
doc = element._doc
c_buffer = tree.xmlAllocOutputBuffer(NULL)
+ if c_buffer is NULL:
+ raise LxmlError, "Failed to create output buffer"
try:
tree.xmlNodeDumpOutput(c_buffer, doc._c_doc, element._c_node, 0, 0, NULL)
_dumpNextNode(c_buffer, doc._c_doc, element._c_node, NULL)
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Wed May 10 22:35:44 2006
@@ -1390,13 +1390,24 @@
# better, but not ET compatible : "_NodeBase elem not None"
_dumpToFile(sys.stdout, elem._doc._c_doc, elem._c_node)
-def tostring(element_or_tree, encoding='us-ascii'):
+def tostring(element_or_tree, encoding='us-ascii', xml_declaration=None):
"Serialize an element to an encoded string representation of its XML tree."
+ cdef int write_declaration
assert element_or_tree is not None # for ElementTree compatibility only
+
+ encoding = str(encoding)
+ if xml_declaration is None:
+ # by default, write an XML declaration only for non-standard encodings
+ write_declaration = (encoding != 'us-ascii')
+ else:
+ write_declaration = bool(xml_declaration)
+
if isinstance(element_or_tree, _NodeBase):
- return _tostring(<_NodeBase>element_or_tree, encoding)
+ return _tostring(<_NodeBase>element_or_tree,
+ encoding, write_declaration)
elif isinstance(element_or_tree, _ElementTree):
- return _tostring((<_ElementTree>element_or_tree)._context_node, encoding)
+ return _tostring((<_ElementTree>element_or_tree)._context_node,
+ encoding, write_declaration)
else:
raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree)
From scoder at codespeak.net Thu May 11 08:15:33 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Thu May 11 08:15:35 2006
Subject: [Lxml-checkins] r27061 - lxml/trunk/doc
Message-ID: <20060511061533.8F2A410089@code0.codespeak.net>
Author: scoder
Date: Thu May 11 08:15:31 2006
New Revision: 27061
Modified:
lxml/trunk/doc/api.txt
Log:
api.txt: note on missing XML declarations in result of tounicode(), compare to tostring()
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Thu May 11 08:15:31 2006
@@ -138,7 +138,7 @@
encoding themselves and thus lie about their real encoding::
>>> try:
- ... broken = etree.XML(u'' + uxml)
+ ... broken = etree.XML(u'\n' + uxml)
... except etree.XMLSyntaxError:
... print "This is not well-formed XML!"
This is not well-formed XML!
@@ -171,6 +171,13 @@
>>> etree.tounicode(et)
u' '
+Note that the unicode string returned by ``tounicode()`` never has an XML
+declaration. This means, it does not specify an encoding nor an XML version.
+This makes it possible to pass the unicode string back into the lxml parsers.
+However, you may have to add a declaration yourself if you want to serialize
+the unicode string to a byte stream later. In contrast, the ``tostring()``
+function automatically adds a declaration as needed.
+
xpath method on ElementTree, Element
------------------------------------
From scoder at codespeak.net Thu May 11 08:37:35 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Thu May 11 08:37:37 2006
Subject: [Lxml-checkins] r27062 - lxml/trunk/doc
Message-ID: <20060511063735.E8BF010088@code0.codespeak.net>
Author: scoder
Date: Thu May 11 08:37:34 2006
New Revision: 27062
Modified:
lxml/trunk/doc/api.txt
Log:
clarification on tostring/tounicode
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Thu May 11 08:37:34 2006
@@ -171,12 +171,12 @@
>>> etree.tounicode(et)
u' '
-Note that the unicode string returned by ``tounicode()`` never has an XML
-declaration. This means, it does not specify an encoding nor an XML version.
-This makes it possible to pass the unicode string back into the lxml parsers.
-However, you may have to add a declaration yourself if you want to serialize
-the unicode string to a byte stream later. In contrast, the ``tostring()``
-function automatically adds a declaration as needed.
+Note that the unicode strings returned by ``tounicode()`` never have an XML
+declaration and therefore do not specify an encoding. This makes it possible
+to pass them back into the lxml parsers. However, you may have to add a
+declaration yourself if you want to serialize such a unicode string to a byte
+stream later. In contrast, the ``tostring()`` function automatically adds a
+declaration as needed that reflects the encoding of the returned byte string.
xpath method on ElementTree, Element
From scoder at codespeak.net Thu May 11 08:50:06 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Thu May 11 08:50:07 2006
Subject: [Lxml-checkins] r27063 - lxml/trunk/doc
Message-ID: <20060511065006.3B68010088@code0.codespeak.net>
Author: scoder
Date: Thu May 11 08:50:04 2006
New Revision: 27063
Modified:
lxml/trunk/doc/api.txt
Log:
api.txt: small cleanup in XSLT docs
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Thu May 11 08:50:04 2006
@@ -269,10 +269,11 @@
>>> str(result)
'\nText \n'
-The result is always a plain string, encoded as requested by the `xsl:output`
-element in the stylesheet. If you want a Python unicode string instead, you
-should set this encoding to `UTF-8` (or leave it as the `ASCII` default).
-This allows you to call the `unicode()` function on the result::
+The result is always a plain string, encoded as requested by the
+``xsl:output`` element in the stylesheet. If you want a Python unicode string
+instead, you should set this encoding to ``UTF-8`` (unless the `ASCII` default
+is sufficient). This allows you to call the builtin ``unicode()`` function on
+the result::
>>> unicode(result)
u'\nText \n'
From scoder at codespeak.net Thu May 11 09:01:48 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Thu May 11 09:01:50 2006
Subject: [Lxml-checkins] r27064 - lxml/trunk/doc
Message-ID: <20060511070148.BF72C10088@code0.codespeak.net>
Author: scoder
Date: Thu May 11 09:01:47 2006
New Revision: 27064
Modified:
lxml/trunk/doc/api.txt
lxml/trunk/doc/main.txt
Log:
api.txt: clarification on output encoding in XSLT
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Thu May 11 09:01:47 2006
@@ -278,7 +278,8 @@
>>> unicode(result)
u'\nText \n'
-However, encodings that are not supported by Python will result in an error::
+You can use other encodings at the cost of multiple recoding. Encodings that
+are not supported by Python will result in an error::
>>> xslt_tree = etree.XML('''\
...
Author: scoder
Date: Thu May 11 14:44:09 2006
New Revision: 27073
Modified:
lxml/trunk/src/lxml/tests/test_xpathevaluator.py
Log:
test case for handling unicode in namespaces of ETXPath expressions
Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original)
+++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Thu May 11 14:44:09 2006
@@ -315,6 +315,19 @@
self.assertEquals(1, len(r))
self.assertEquals('{nsb}b', r[0].tag)
+ def test_xpath_compile_unicode(self):
+ x = self.parse(u' ')
+
+ expr = etree.ETXPath(u"/a/{nsa\uf8d2}b")
+ r = expr.evaluate(x)
+ self.assertEquals(1, len(r))
+ self.assertEquals(u'{nsa\uf8d2}b', r[0].tag)
+
+ expr = etree.ETXPath(u"/a/{nsb\uf8d1}b")
+ r = expr.evaluate(x)
+ self.assertEquals(1, len(r))
+ self.assertEquals(u'{nsb\uf8d1}b', r[0].tag)
+
SAMPLE_XML = etree.parse(StringIO("""
text
From scoder at codespeak.net Thu May 11 14:45:27 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Thu May 11 14:45:28 2006
Subject: [Lxml-checkins] r27074 - lxml/trunk/src/lxml
Message-ID: <20060511124527.6978A1008D@code0.codespeak.net>
Author: scoder
Date: Thu May 11 14:45:25 2006
New Revision: 27074
Modified:
lxml/trunk/src/lxml/xpath.pxi
Log:
some restructuring in ETXPath._nsextract_path() to fix path encoding
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Thu May 11 14:45:25 2006
@@ -211,14 +211,15 @@
"""Special XPath class that supports the ElementTree {uri} notation for
namespaces."""
def __init__(self, path, extensions=None):
- path_utf, namespaces = self._nsextract_path(_utf8(path))
- XPath.__init__(self, funicode(path_utf), namespaces, extensions)
+ path, namespaces = self._nsextract_path(path)
+ XPath.__init__(self, path, namespaces, extensions)
- cdef _nsextract_path(self, path_utf):
+ cdef _nsextract_path(self, path):
# replace {namespaces} by new prefixes
cdef int i
- namespaces = {}
+ path_utf = path.encode('UTF-8')
stripped_path = _replace_strings('', path_utf) # remove string literals
+ namespaces = {}
namespace_defs = []
i = 1
for namespace_def in _find_namespaces(stripped_path):
@@ -227,8 +228,11 @@
i = i+1
python.PyList_Append(namespace_defs, namespace_def)
namespace = namespace_def[1:-1] # remove '{}'
+ namespace = python.PyUnicode_FromEncodedObject(
+ namespace, 'UTF-8', 'strict')
python.PyDict_SetItem(namespaces, prefix, namespace)
prefix_str = prefix + ':'
# FIXME: this also replaces {namespaces} within strings!
- path_utf = path_utf.replace(namespace_def, prefix_str)
- return path_utf, namespaces
+ path_utf = path_utf.replace(namespace_def, prefix_str)
+ path = python.PyUnicode_FromEncodedObject(path_utf, 'UTF-8', 'strict')
+ return path, namespaces
From scoder at codespeak.net Thu May 11 15:08:27 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Thu May 11 15:08:29 2006
Subject: [Lxml-checkins] r27076 - lxml/branch/lxml-0.9.x/doc
Message-ID: <20060511130827.D6B2110082@code0.codespeak.net>
Author: scoder
Date: Thu May 11 15:08:26 2006
New Revision: 27076
Modified:
lxml/branch/lxml-0.9.x/doc/main.txt
Log:
main.txt: 0.9.2, link to latest CHANGES.txt in SVN
Modified: lxml/branch/lxml-0.9.x/doc/main.txt
==============================================================================
--- lxml/branch/lxml-0.9.x/doc/main.txt (original)
+++ lxml/branch/lxml-0.9.x/doc/main.txt Thu May 11 15:08:26 2006
@@ -16,6 +16,8 @@
News
----
+* 2006-05-10: `lxml 0.9.2`_ released (`changes for 0.9.2`_)
+
* 2006-03-30: `lxml 0.9.1`_ released (`changes for 0.9.1`_)
* 2006-03-20: `lxml 0.9`_ released (`changes for 0.9`_)
@@ -30,6 +32,8 @@
* 2005-04-08: `lxml 0.5`_ released!
+.. _`lxml 0.9.2`: lxml-0.9.2.tgz
+
.. _`lxml 0.9.1`: lxml-0.9.1.tgz
.. _`lxml 0.9`: lxml-0.9.tgz
@@ -44,6 +48,8 @@
.. _`lxml 0.5`: lxml-0.5.tgz
+.. _`CHANGES for 0.9.2`: changes-0.9.2.html
+
.. _`CHANGES for 0.9.1`: changes-0.9.1.html
.. _`CHANGES for 0.9`: changes-0.9.html
@@ -109,6 +115,8 @@
.. _`lxml at the Python cheeseshop`: http://cheeseshop.python.org/pypi/lxml/
+* `lxml 0.9.2`_ (2006-05-10)
+
* `lxml 0.9.1`_ (2006-03-30)
* `lxml 0.9`_ (2006-03-20)
@@ -136,9 +144,13 @@
svn co http://codespeak.net/svn/lxml/trunk lxml
-You can also `browse it through the web`_.
+You can also `browse it through the web`_. The `latest CHANGES`_ of the
+developer version are also accessible. You can check there if a bug you found
+has been fixed or a feature you want has been implemented in the latest trunk
+version.
.. _`browse it through the web`: http://codespeak.net/svn/lxml
+.. _`latest CHANGES`: http://codespeak.net/svn/lxml/trunk/CHANGES.txt
License
-------
From scoder at codespeak.net Thu May 11 19:24:06 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Thu May 11 19:24:08 2006
Subject: [Lxml-checkins] r27090 - lxml/trunk/src/lxml
Message-ID: <20060511172406.3BF9B10087@code0.codespeak.net>
Author: scoder
Date: Thu May 11 19:24:04 2006
New Revision: 27090
Modified:
lxml/trunk/src/lxml/extensions.pxi
Log:
C-ification in extension function lookup, fast-path for main dictionary check
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Thu May 11 19:24:04 2006
@@ -22,6 +22,7 @@
cdef object _namespaces
cdef object _utf_refs
cdef object _function_cache
+ cdef object _function_cache_ns
cdef object _called_function
# for exception handling and temporary reference keeping:
cdef _TempStore _temp_refs
@@ -31,6 +32,7 @@
self._xpathCtxt = NULL
self._utf_refs = {}
self._function_cache = {}
+ self._function_cache_ns = {}
self._called_function = None
if extensions is not None:
@@ -73,6 +75,7 @@
self._doc = doc
self._exc.clear()
python.PyDict_Clear(self._function_cache)
+ python.PyDict_Clear(self._function_cache_ns)
namespaces = self._namespaces
if namespaces is not None:
self.registerNamespaces(namespaces)
@@ -108,23 +111,48 @@
# extension functions
- cdef int _prepare_function_call(self, ns_uri_utf, name_utf):
+ cdef int _prepare_function_call(self, char* c_ns_uri, char* c_name):
+ """Find an extension function and store it in 'self._called_function'.
+ This is absolutely performance-critical for XPath/XSLT!
+ Return 1 if it was found, 0 otherwise.
+ Parameters: c_ns_uri may be NULL, c_name must not be NULL
+ """
+ cdef python.PyObject* c_dict
cdef python.PyObject* dict_result
- key = (ns_uri_utf, name_utf)
- dict_result = python.PyDict_GetItem(self._function_cache, key)
- if dict_result is not NULL:
- function = dict_result
- self._called_function = function
- return function is not None
+ if c_ns_uri is NULL:
+ c_dict = self._function_cache
+ else:
+ c_dict = python.PyDict_GetItemString(
+ self._function_cache_ns, c_ns_uri)
+
+ if c_dict is not NULL:
+ d = c_dict
+ dict_result = python.PyDict_GetItemString(d, c_name)
+ if dict_result is not NULL:
+ function = dict_result
+ self._called_function = function
+ return function is not None
+ else:
+ d = {}
+ python.PyDict_SetItem(self._function_cache_ns, ns_uri_utf, d)
+
+ # first time we look up this function, so the rest is less critical
+ if c_ns_uri is not NULL:
+ ns_uri_utf = c_ns_uri
+ name_utf = c_name
if self._extensions is not None:
- dict_result = python.PyDict_GetItem(self._extensions, key)
+ dict_result = python.PyDict_GetItem(
+ self._extensions, (ns_uri_utf, name_utf))
+ else:
+ dict_result = NULL
if dict_result is not NULL:
function = dict_result
else:
function = _find_extension(ns_uri_utf, name_utf)
- python.PyDict_SetItem(self._function_cache, key, function)
+ # we also store None values here to make sure we remember
+ python.PyDict_SetItem(d, name_utf, function)
self._called_function = function
return function is not None
@@ -180,14 +208,8 @@
char* c_name, char* c_ns_uri):
"Module level lookup function for XPath/XSLT functions"
cdef _BaseContext context
- if c_name is NULL:
- return NULL
- if c_ns_uri is NULL:
- ns_uri = None
- else:
- ns_uri = c_ns_uri
context = <_BaseContext>ctxt
- if context._prepare_function_call(ns_uri, c_name):
+ if context._prepare_function_call(c_ns_uri, c_name):
return _call_prepared_function
else:
return NULL
@@ -289,16 +311,15 @@
cdef _BaseContext context
rctxt = ctxt.context
context = <_BaseContext>(rctxt.userData)
- name = rctxt.function
- if rctxt.functionURI is not NULL:
- uri = rctxt.functionURI
- else:
- uri = None
- if context._prepare_function_call(uri, name):
+ if context._prepare_function_call(rctxt.functionURI, rctxt.function):
_extension_function_call(context, ctxt, nargs)
else:
+ if rctxt.functionURI is not NULL:
+ fref = "{%s}%s" % (rctxt.functionURI, rctxt.function)
+ else:
+ fref = rctxt.function
xpath.xmlXPathErr(ctxt, xpath.XPATH_EXPR_ERROR)
- exception = XPathFunctionError("XPath function {%s}%s not found" % (uri, name))
+ exception = XPathFunctionError("XPath function '%s' not found" % fref)
context._exc._store_exception(exception)
cdef void _call_prepared_function(xpath.xmlXPathParserContext* ctxt, int nargs):
From scoder at codespeak.net Thu May 11 21:34:55 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Thu May 11 21:34:57 2006
Subject: [Lxml-checkins] r27097 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20060511193455.6A80010098@code0.codespeak.net>
Author: scoder
Date: Thu May 11 21:34:50 2006
New Revision: 27097
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_elementtree.py
lxml/trunk/src/lxml/tests/test_errors.py
lxml/trunk/src/lxml/tree.pxd
Log:
fix: Element/SubElement failed to set attribute namespaces from passed attrib dictionary: namespaces were not even parsed and had to be set /after/ node namespace setup
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu May 11 21:34:50 2006
@@ -49,6 +49,9 @@
Bugs fixed
----------
+* Element/SubElement failed to set attribute namespaces from passed ``attrib``
+ dictionary
+
* ``tostring()`` now adds an XML declaration for non-ASCII encodings
* ``tostring()`` failed to serialize encodings that contain 0-bytes
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Thu May 11 21:34:50 2006
@@ -894,9 +894,10 @@
ns_utf, name_utf = _getNsTag(_tag)
doc = self._doc
c_doc = doc._c_doc
- c_node = _createElement(c_doc, name_utf, attrib, _extra)
+ c_node = _createElement(c_doc, name_utf)
# add namespaces to node if necessary
doc._setNodeNamespaces(c_node, ns_utf, nsmap)
+ _setNodeAttributes(c_node, doc, attrib, _extra)
return _elementFactory(doc, c_node)
def find(self, path):
@@ -1266,26 +1267,36 @@
return cstd.strcmp(c_node.ns.href, self._href) == 0
return 0
-cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf,
- object attrib, object extra) except NULL:
+cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf) except NULL:
cdef xmlNode* c_node
+ c_node = tree.xmlNewDocNode(c_doc, NULL, _cstr(name_utf), NULL)
+ return c_node
+
+cdef xmlNode* _createComment(xmlDoc* c_doc, char* text):
+ cdef xmlNode* c_node
+ c_node = tree.xmlNewDocComment(c_doc, text)
+ return c_node
+
+cdef _setNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra):
+ cdef xmlNs* c_ns
+ # 'extra' is not checked here (expected to be a keyword dict)
+ if attrib is not None and not hasattr(attrib, 'items'):
+ raise TypeError, "Invalid attribute dictionary: %s" % type(attrib)
if extra:
if attrib is None:
attrib = extra
else:
attrib.update(extra)
- c_node = tree.xmlNewDocNode(c_doc, NULL, _cstr(name_utf), NULL)
if attrib:
for name, value in attrib.items():
- attr_name_utf = _utf8(name)
+ attr_ns_utf, attr_name_utf = _getNsTag(name)
value_utf = _utf8(value)
- tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf))
- return c_node
-
-cdef xmlNode* _createComment(xmlDoc* c_doc, char* text):
- cdef xmlNode* c_node
- c_node = tree.xmlNewDocComment(c_doc, text)
- return c_node
+ if attr_ns_utf is None:
+ tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf))
+ else:
+ c_ns = doc._findOrBuildNodeNs(c_node, _cstr(attr_ns_utf))
+ tree.xmlNewNsProp(c_node, c_ns,
+ _cstr(attr_name_utf), _cstr(value_utf))
# module-level API for ElementTree
@@ -1296,11 +1307,12 @@
cdef _Document doc
ns_utf, name_utf = _getNsTag(_tag)
c_doc = _newDoc()
- c_node = _createElement(c_doc, name_utf, attrib, _extra)
+ c_node = _createElement(c_doc, name_utf)
tree.xmlDocSetRootElement(c_doc, c_node)
doc = _documentFactory(c_doc, None)
# add namespaces to node if necessary
doc._setNodeNamespaces(c_node, ns_utf, nsmap)
+ _setNodeAttributes(c_node, doc, attrib, _extra)
return _elementFactory(doc, c_node)
def Comment(text=None):
@@ -1323,10 +1335,11 @@
cdef _Document doc
ns_utf, name_utf = _getNsTag(_tag)
doc = _parent._doc
- c_node = _createElement(doc._c_doc, name_utf, attrib, _extra)
+ c_node = _createElement(doc._c_doc, name_utf)
tree.xmlAddChild(_parent._c_node, c_node)
# add namespaces to node if necessary
doc._setNodeNamespaces(c_node, ns_utf, nsmap)
+ _setNodeAttributes(c_node, doc, attrib, _extra)
return _elementFactory(doc, c_node)
def ElementTree(_Element element=None, file=None, parser=None):
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Thu May 11 21:34:50 2006
@@ -476,6 +476,13 @@
result.sort()
self.assertEquals(['alpha', 'beta', 'gamma'], result)
+ def test_element_with_attributes_keywords(self):
+ Element = self.etree.Element
+
+ el = Element('tag', foo='Foo', bar='Bar')
+ self.assertEquals('Foo', el.attrib['foo'])
+ self.assertEquals('Bar', el.attrib['bar'])
+
def test_element_with_attributes(self):
Element = self.etree.Element
@@ -483,13 +490,30 @@
self.assertEquals('Foo', el.attrib['foo'])
self.assertEquals('Bar', el.attrib['bar'])
+ def test_element_with_attributes_ns(self):
+ Element = self.etree.Element
+
+ el = Element('tag', {'{ns1}foo':'Foo', '{ns2}bar':'Bar'})
+ self.assertEquals('Foo', el.attrib['{ns1}foo'])
+ self.assertEquals('Bar', el.attrib['{ns2}bar'])
+
def test_subelement_with_attributes(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
el = Element('tag')
- SubElement(el, 'foo', baz="Baz")
+ SubElement(el, 'foo', attrib={'foo':'Foo'}, baz="Baz")
self.assertEquals("Baz", el[0].attrib['baz'])
+ self.assertEquals('Foo', el[0].attrib['foo'])
+
+ def test_subelement_with_attributes_ns(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ el = Element('tag')
+ SubElement(el, 'foo', {'{ns1}foo':'Foo', '{ns2}bar':'Bar'})
+ self.assertEquals('Foo', el[0].attrib['{ns1}foo'])
+ self.assertEquals('Bar', el[0].attrib['{ns2}bar'])
def test_write(self):
ElementTree = self.etree.ElementTree
Modified: lxml/trunk/src/lxml/tests/test_errors.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_errors.py (original)
+++ lxml/trunk/src/lxml/tests/test_errors.py Thu May 11 21:34:50 2006
@@ -14,7 +14,7 @@
def test_bad_element(self):
# attrib argument of Element() should be a dictionary, so if
# we pass a string we should get an error.
- self.assertRaises(AttributeError, self.etree.Element, 'a', 'b')
+ self.assertRaises(TypeError, self.etree.Element, 'a', 'b')
def test_empty_parse(self):
self.assertRaises(etree.XMLSyntaxError, etree.fromstring, '')
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Thu May 11 21:34:50 2006
@@ -167,6 +167,8 @@
char* name, char* content)
cdef xmlDoc* xmlNewDoc(char* version)
cdef xmlAttr* xmlNewProp(xmlNode* node, char* name, char* value)
+ cdef xmlAttr* xmlNewNsProp(xmlNode* node, xmlNs* ns,
+ char* name, char* value)
cdef char* xmlGetNoNsProp(xmlNode* node, char* name)
cdef char* xmlGetNsProp(xmlNode* node, char* name, char* nameSpace)
cdef void xmlSetNs(xmlNode* node, xmlNs* ns)
From scoder at codespeak.net Thu May 11 22:37:29 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Thu May 11 22:37:31 2006
Subject: [Lxml-checkins] r27098 - lxml/trunk
Message-ID: <20060511203729.8904B10090@code0.codespeak.net>
Author: scoder
Date: Thu May 11 22:37:27 2006
New Revision: 27098
Modified:
lxml/trunk/bench.py
Log:
allow benchmarks with LARGE trees (-l/-L), bench serialization with attributes
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Thu May 11 22:37:27 2006
@@ -2,11 +2,16 @@
from itertools import *
from StringIO import StringIO
-_TEXT = "some ASCII text"
-_UTEXT = u"some klingon: \F8D2"
+TREE_FACTOR = 1 # increase tree size with '-l / '-L' cmd option
+
+_TEXT = "some ASCII text" * 10 * TREE_FACTOR
+_UTEXT = u"some klingon: \F8D2" * 10 * TREE_FACTOR
_ATTRIBUTES = {
- '{attr}test' : _UTEXT,
- 'bla' : _TEXT
+ '{attr}test1' : _UTEXT,
+ '{attr}test2' : _UTEXT,
+ 'bla1' : _TEXT,
+ 'bla2' : _TEXT,
+ 'bla3' : _TEXT
}
def with_attributes(use_attributes):
@@ -125,45 +130,45 @@
return all_trees
def _setup_tree1(self, text, attributes):
- "tree with 26 2nd level and 520 3rd level children"
+ "tree with 26 2nd level and 520 * TREE_FACTOR 3rd level children"
atoz = self.atoz
SubElement = self.etree.SubElement
current_time = time.time
t = current_time()
- root = self.etree.Element('{a}root')
+ root = self.etree.Element('{abc}rootnode')
for ch1 in atoz:
- el = SubElement(root, "{b}"+ch1, attributes)
+ el = SubElement(root, "{bcd}"+ch1*5, attributes)
for ch2 in atoz:
- for i in range(20):
- SubElement(el, "{c}%s%03d" % (ch2, i))
+ for i in range(20 * TREE_FACTOR):
+ SubElement(el, "{cdefg}%s%05d" % (ch2, i))
t = current_time() - t
return (root, t)
def _setup_tree2(self, text, attributes):
- "tree with 520 2nd level and 26 3rd level children"
+ "tree with 520 * TREE_FACTOR 2nd level and 26 3rd level children"
atoz = self.atoz
SubElement = self.etree.SubElement
current_time = time.time
t = current_time()
- root = self.etree.Element('{a}root')
+ root = self.etree.Element('{abc}rootnode')
for ch1 in atoz:
- for i in range(20):
- el = SubElement(root, "{b}"+ch1, attributes)
+ for i in range(20 * TREE_FACTOR):
+ el = SubElement(root, "{bcd}"+ch1*5, attributes)
for ch2 in atoz:
- SubElement(el, "{c}%s%03d" % (ch2, i))
+ SubElement(el, "{cdefg}%s%05d" % (ch2, i))
t = current_time() - t
return (root, t)
def _setup_tree3(self, text, attributes):
- "tree of depth 8 with 3 children per node"
+ "tree of depth 8 + TREE_FACTOR with 3 children per node"
SubElement = self.etree.SubElement
current_time = time.time
t = current_time()
- root = self.etree.Element('{a}root')
+ root = self.etree.Element('{abc}rootnode')
children = [root]
- for i in range(7):
+ for i in range(6 + TREE_FACTOR):
tag_no = count().next
- children = [ SubElement(c, "{b}a%d" % i, attributes)
+ children = [ SubElement(c, "{bcd}a%05d" % i, attributes)
for i,c in enumerate(chain(children, children, children)) ]
t = current_time() - t
return (root, t)
@@ -174,12 +179,12 @@
SubElement = self.etree.SubElement
current_time = time.time
t = current_time()
- root = self.etree.Element('{a}root')
+ root = self.etree.Element('{abc}rootnode')
children = [root]
for ch1 in atoz:
- el = SubElement(root, "{b}"+ch1, attributes)
- SubElement(el, "{c}a", attributes)
- SubElement(el, "{c}b", attributes)
+ el = SubElement(root, "{bcd}"+ch1*5, attributes)
+ SubElement(el, "{cdefg}abcde", attributes)
+ SubElement(el, "{cdefg}bcdef", attributes)
t = current_time() - t
return (root, t)
@@ -249,19 +254,28 @@
for child in reversed(root):
pass
+ @with_attributes(True)
+ @with_attributes(False)
@with_text(text=True, utext=True)
def bench_tostring_utf8(self, root):
self.etree.tostring(root, 'UTF-8')
+ @with_attributes(True)
+ @with_attributes(False)
@with_text(text=True, utext=True)
def bench_tostring_utf16(self, root):
self.etree.tostring(root, 'UTF-16')
+ @with_attributes(True)
+ @with_attributes(False)
@with_text(text=True, utext=True)
def bench_tostring_utf8_unicode_XML(self, root):
xml = unicode(self.etree.tostring(root, 'UTF-8'), 'UTF-8')
+ open("test%03d.txt" % len(root), 'w').write(xml.encode('UTF-8'))
self.etree.XML(xml)
+ @with_attributes(True)
+ @with_attributes(False)
@with_text(text=True, utext=True)
def bench_write_utf8_parse_stringIO(self, root):
f = StringIO()
@@ -495,42 +509,58 @@
if len(sys.argv) > 1:
try:
sys.argv.remove('-i')
+ # run benchmark 'inplace'
sys.path.insert(0, 'src')
except ValueError:
pass
try:
sys.argv.remove('-nolxml')
+ # run without lxml
import_lxml = False
except ValueError:
pass
try:
- sys.argv.remove('-c')
+ sys.argv.remove('-z')
+ # reset callgrind after tree setup
callgrind_zero = True
except ValueError:
pass
+ try:
+ sys.argv.remove('-l')
+ # use large trees
+ TREE_FACTOR *= 2
+ except ValueError:
+ pass
+
+ try:
+ sys.argv.remove('-L')
+ # use LARGE trees
+ TREE_FACTOR *= 2
+ except ValueError:
+ pass
+
_etrees = []
if import_lxml:
from lxml import etree
_etrees.append(etree)
if len(sys.argv) > 1:
- try:
- sys.argv.remove('-a')
- except ValueError:
- pass
- else:
+ if '-a' in sys.argv or '-c' in sys.argv:
+ # 'all' or 'C-implementations' ?
try:
- from elementtree import ElementTree as ET
- _etrees.append(ET)
+ import cElementTree as cET
+ _etrees.append(cET)
except ImportError:
pass
+ if '-a' in sys.argv:
+ # 'all' ?
try:
- import cElementTree as cET
- _etrees.append(cET)
+ from elementtree import ElementTree as ET
+ _etrees.append(ET)
except ImportError:
pass
@@ -551,7 +581,9 @@
if not name.startswith('bench_'):
name = 'bench_' + name
selected.append(name)
- benchmarks = [ [ b for b in bs if b[0] in selected ]
+ benchmarks = [ [ b for b in bs
+ if [ contains for contains in selected
+ if contains in b[0] ] ]
for bs in benchmarks ]
import time
From scoder at codespeak.net Fri May 12 06:06:56 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 12 06:06:58 2006
Subject: [Lxml-checkins] r27102 - lxml/trunk/src/lxml
Message-ID: <20060512040656.9902810090@code0.codespeak.net>
Author: scoder
Date: Fri May 12 06:06:54 2006
New Revision: 27102
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
_setNodeAttributes -> _initNodeAttributes - make clear what it is meant to do
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Fri May 12 06:06:54 2006
@@ -897,7 +897,7 @@
c_node = _createElement(c_doc, name_utf)
# add namespaces to node if necessary
doc._setNodeNamespaces(c_node, ns_utf, nsmap)
- _setNodeAttributes(c_node, doc, attrib, _extra)
+ _initNodeAttributes(c_node, doc, attrib, _extra)
return _elementFactory(doc, c_node)
def find(self, path):
@@ -1277,7 +1277,7 @@
c_node = tree.xmlNewDocComment(c_doc, text)
return c_node
-cdef _setNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra):
+cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra):
cdef xmlNs* c_ns
# 'extra' is not checked here (expected to be a keyword dict)
if attrib is not None and not hasattr(attrib, 'items'):
@@ -1312,7 +1312,7 @@
doc = _documentFactory(c_doc, None)
# add namespaces to node if necessary
doc._setNodeNamespaces(c_node, ns_utf, nsmap)
- _setNodeAttributes(c_node, doc, attrib, _extra)
+ _initNodeAttributes(c_node, doc, attrib, _extra)
return _elementFactory(doc, c_node)
def Comment(text=None):
@@ -1339,7 +1339,7 @@
tree.xmlAddChild(_parent._c_node, c_node)
# add namespaces to node if necessary
doc._setNodeNamespaces(c_node, ns_utf, nsmap)
- _setNodeAttributes(c_node, doc, attrib, _extra)
+ _initNodeAttributes(c_node, doc, attrib, _extra)
return _elementFactory(doc, c_node)
def ElementTree(_Element element=None, file=None, parser=None):
From scoder at codespeak.net Fri May 12 07:45:07 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 12 07:45:09 2006
Subject: [Lxml-checkins] r27103 - in lxml/branch/lxml-0.9.x: . src/lxml
src/lxml/tests
Message-ID: <20060512054507.561A210090@code0.codespeak.net>
Author: scoder
Date: Fri May 12 07:45:05 2006
New Revision: 27103
Modified:
lxml/branch/lxml-0.9.x/CHANGES.txt
lxml/branch/lxml-0.9.x/src/lxml/etree.pyx
lxml/branch/lxml-0.9.x/src/lxml/tests/test_elementtree.py
lxml/branch/lxml-0.9.x/src/lxml/tests/test_errors.py
lxml/branch/lxml-0.9.x/src/lxml/tree.pxd
Log:
merged in Element attribute initialization bugfix fron trunk
Modified: lxml/branch/lxml-0.9.x/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-0.9.x/CHANGES.txt (original)
+++ lxml/branch/lxml-0.9.x/CHANGES.txt Fri May 12 07:45:05 2006
@@ -1,6 +1,18 @@
lxml changelog
==============
+current
+=======
+
+Features added
+--------------
+
+Bugs fixed
+----------
+
+* Element/SubElement failed to set attribute namespaces from passed ``attrib``
+ dictionary
+
0.9.2 (2006-05-10)
==================
Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/lxml-0.9.x/src/lxml/etree.pyx (original)
+++ lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Fri May 12 07:45:05 2006
@@ -763,9 +763,10 @@
ns_utf, name_utf = _getNsTag(_tag)
doc = self._doc
c_doc = doc._c_doc
- c_node = _createElement(c_doc, name_utf, attrib, _extra)
+ c_node = _createElement(c_doc, name_utf)
# add namespaces to node if necessary
doc._setNodeNamespaces(c_node, ns_utf, nsmap)
+ _initNodeAttributes(c_node, doc, attrib, _extra)
return _elementFactory(doc, c_node)
def find(self, path):
@@ -1114,26 +1115,35 @@
return tree.strcmp(c_node.ns.href, self._href) == 0
return 0
-cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf,
- object attrib, object extra) except NULL:
+cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf) except NULL:
cdef xmlNode* c_node
+ c_node = tree.xmlNewDocNode(c_doc, NULL, name_utf, NULL)
+ return c_node
+
+cdef xmlNode* _createComment(xmlDoc* c_doc, char* text):
+ cdef xmlNode* c_node
+ c_node = tree.xmlNewDocComment(c_doc, text)
+ return c_node
+
+cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra):
+ cdef xmlNs* c_ns
+ # 'extra' is not checked here (expected to be a keyword dict)
+ if attrib is not None and not hasattr(attrib, 'items'):
+ raise TypeError, "Invalid attribute dictionary: %s" % type(attrib)
if extra:
if attrib is None:
attrib = extra
else:
attrib.update(extra)
- c_node = tree.xmlNewDocNode(c_doc, NULL, name_utf, NULL)
if attrib:
for name, value in attrib.items():
- attr_name_utf = _utf8(name)
+ attr_ns_utf, attr_name_utf = _getNsTag(name)
value_utf = _utf8(value)
- tree.xmlNewProp(c_node, attr_name_utf, value_utf)
- return c_node
-
-cdef xmlNode* _createComment(xmlDoc* c_doc, char* text):
- cdef xmlNode* c_node
- c_node = tree.xmlNewDocComment(c_doc, text)
- return c_node
+ if attr_ns_utf is None:
+ tree.xmlNewProp(c_node, attr_name_utf, value_utf)
+ else:
+ c_ns = doc._findOrBuildNodeNs(c_node, attr_ns_utf)
+ tree.xmlNewNsProp(c_node, c_ns, attr_name_utf, value_utf)
# module-level API for ElementTree
@@ -1144,11 +1154,12 @@
cdef _Document doc
ns_utf, name_utf = _getNsTag(_tag)
c_doc = theParser.newDoc()
- c_node = _createElement(c_doc, name_utf, attrib, _extra)
+ c_node = _createElement(c_doc, name_utf)
tree.xmlDocSetRootElement(c_doc, c_node)
doc = _documentFactory(c_doc)
# add namespaces to node if necessary
doc._setNodeNamespaces(c_node, ns_utf, nsmap)
+ _initNodeAttributes(c_node, doc, attrib, _extra)
return _elementFactory(doc, c_node)
def Comment(text=None):
@@ -1169,10 +1180,11 @@
_raiseIfNone(_parent)
ns_utf, name_utf = _getNsTag(_tag)
doc = _parent._doc
- c_node = _createElement(doc._c_doc, name_utf, attrib, _extra)
+ c_node = _createElement(doc._c_doc, name_utf)
tree.xmlAddChild(_parent._c_node, c_node)
# add namespaces to node if necessary
doc._setNodeNamespaces(c_node, ns_utf, nsmap)
+ _initNodeAttributes(c_node, doc, attrib, _extra)
return _elementFactory(doc, c_node)
def ElementTree(_Element element=None, file=None, parser=None):
Modified: lxml/branch/lxml-0.9.x/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/branch/lxml-0.9.x/src/lxml/tests/test_elementtree.py (original)
+++ lxml/branch/lxml-0.9.x/src/lxml/tests/test_elementtree.py Fri May 12 07:45:05 2006
@@ -475,6 +475,13 @@
result.sort()
self.assertEquals(['alpha', 'beta', 'gamma'], result)
+ def test_element_with_attributes_keywords(self):
+ Element = self.etree.Element
+
+ el = Element('tag', foo='Foo', bar='Bar')
+ self.assertEquals('Foo', el.attrib['foo'])
+ self.assertEquals('Bar', el.attrib['bar'])
+
def test_element_with_attributes(self):
Element = self.etree.Element
@@ -482,13 +489,30 @@
self.assertEquals('Foo', el.attrib['foo'])
self.assertEquals('Bar', el.attrib['bar'])
+ def test_element_with_attributes_ns(self):
+ Element = self.etree.Element
+
+ el = Element('tag', {'{ns1}foo':'Foo', '{ns2}bar':'Bar'})
+ self.assertEquals('Foo', el.attrib['{ns1}foo'])
+ self.assertEquals('Bar', el.attrib['{ns2}bar'])
+
def test_subelement_with_attributes(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
el = Element('tag')
- SubElement(el, 'foo', baz="Baz")
+ SubElement(el, 'foo', attrib={'foo':'Foo'}, baz="Baz")
self.assertEquals("Baz", el[0].attrib['baz'])
+ self.assertEquals('Foo', el[0].attrib['foo'])
+
+ def test_subelement_with_attributes_ns(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ el = Element('tag')
+ SubElement(el, 'foo', {'{ns1}foo':'Foo', '{ns2}bar':'Bar'})
+ self.assertEquals('Foo', el[0].attrib['{ns1}foo'])
+ self.assertEquals('Bar', el[0].attrib['{ns2}bar'])
def test_write(self):
ElementTree = self.etree.ElementTree
Modified: lxml/branch/lxml-0.9.x/src/lxml/tests/test_errors.py
==============================================================================
--- lxml/branch/lxml-0.9.x/src/lxml/tests/test_errors.py (original)
+++ lxml/branch/lxml-0.9.x/src/lxml/tests/test_errors.py Fri May 12 07:45:05 2006
@@ -14,7 +14,7 @@
def test_bad_element(self):
# attrib argument of Element() should be a dictionary, so if
# we pass a string we should get an error.
- self.assertRaises(AttributeError, self.etree.Element, 'a', 'b')
+ self.assertRaises(TypeError, self.etree.Element, 'a', 'b')
def test_empty_parse(self):
self.assertRaises(etree.XMLSyntaxError, etree.fromstring, '')
Modified: lxml/branch/lxml-0.9.x/src/lxml/tree.pxd
==============================================================================
--- lxml/branch/lxml-0.9.x/src/lxml/tree.pxd (original)
+++ lxml/branch/lxml-0.9.x/src/lxml/tree.pxd Fri May 12 07:45:05 2006
@@ -115,6 +115,8 @@
char* name, char* content)
cdef xmlDoc* xmlNewDoc(char* version)
cdef xmlAttr* xmlNewProp(xmlNode* node, char* name, char* value)
+ cdef xmlAttr* xmlNewNsProp(xmlNode* node, xmlNs* ns,
+ char* name, char* value)
cdef char* xmlGetNoNsProp(xmlNode* node, char* name)
cdef char* xmlGetNsProp(xmlNode* node, char* name, char* nameSpace)
cdef void xmlSetNs(xmlNode* node, xmlNs* ns)
From scoder at codespeak.net Fri May 12 16:18:29 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 12 16:18:30 2006
Subject: [Lxml-checkins] r27132 - lxml/trunk/src/lxml/tests
Message-ID: <20060512141829.7CD5E100B7@code0.codespeak.net>
Author: scoder
Date: Fri May 12 16:18:28 2006
New Revision: 27132
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
test cases for Element.findall()
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Fri May 12 16:18:28 2006
@@ -476,6 +476,23 @@
result.sort()
self.assertEquals(['alpha', 'beta', 'gamma'], result)
+ def test_findall(self):
+ XML = self.etree.XML
+ root = XML(' ')
+ self.assertEquals(len(root.findall("c")), 1)
+ self.assertEquals(len(root.findall(".//c")), 2)
+ self.assertEquals(len(root.findall(".//b")), 3)
+ self.assertEquals(len(root.findall(".//b")[0]), 1)
+ self.assertEquals(len(root.findall(".//b")[1]), 0)
+ self.assertEquals(len(root.findall(".//b")[2]), 0)
+
+ def test_findall_ns(self):
+ XML = self.etree.XML
+ root = XML(' ')
+ self.assertEquals(len(root.findall(".//{X}b")), 2)
+ self.assertEquals(len(root.findall(".//b")), 3)
+ self.assertEquals(len(root.findall("b")), 2)
+
def test_element_with_attributes_keywords(self):
Element = self.etree.Element
@@ -1107,7 +1124,7 @@
list(a.getiterator('a')))
self.assertEquals(
[a2],
- list(e.getiterator('a')))
+ list(c.getiterator('a')))
def test_getiterator_with_text(self):
Element = self.etree.Element
From scoder at codespeak.net Fri May 12 16:26:30 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 12 16:26:31 2006
Subject: [Lxml-checkins] r27133 - in lxml/trunk: . src/lxml
Message-ID: <20060512142630.2BED0100B4@code0.codespeak.net>
Author: scoder
Date: Fri May 12 16:26:28 2006
New Revision: 27133
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/python.pxd
Log:
rewrite of ElementDepthFirstIterator to support tag selection: complete support for Element.getiterator()
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri May 12 16:26:28 2006
@@ -7,6 +7,8 @@
Features added
--------------
+* Speedup of Element.findall(tag) and Element.getiterator(tag)
+
* Support for writing the XML representation of Elements and ElementTrees to
Python unicode strings via ``etree.tounicode()``
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Fri May 12 16:26:28 2006
@@ -281,8 +281,7 @@
return c_child
c = c + 1
c_child = c_child.next
- else:
- return NULL
+ return NULL
cdef xmlNode* _findChildBackwards(xmlNode* c_node, Py_ssize_t index):
"""Return child element of c_node with index, or return NULL if not found.
@@ -298,8 +297,7 @@
return c_child
c = c + 1
c_child = c_child.prev
- else:
- return NULL
+ return NULL
cdef xmlNode* _nextElement(xmlNode* c_node):
"""Given a node, find the next sibling that is an element.
@@ -321,6 +319,59 @@
c_node = c_node.prev
return NULL
+cdef xmlNode* _findDepthFirstInDescendents(xmlNode* c_node,
+ char* c_href, char* c_name):
+ if c_node is NULL:
+ return NULL
+ c_node = c_node.children
+ if c_node is NULL:
+ return NULL
+ if not _isElement(c_node):
+ c_node = _nextElement(c_node)
+ return _findDepthFirstInFollowing(c_node, c_href, c_name)
+
+cdef xmlNode* _findDepthFirstInFollowingSiblings(xmlNode* c_node,
+ char* c_href, char* c_name):
+ if c_node is NULL:
+ return NULL
+ c_node = _nextElement(c_node)
+ return _findDepthFirstInFollowing(c_node, c_href, c_name)
+
+cdef xmlNode* _findDepthFirstInFollowing(xmlNode* c_node,
+ char* c_href, char* c_name):
+ """Find the next matching node by traversing:
+ 1) the node itself
+ 2) its descendents
+ 3) its following siblings.
+ """
+ cdef xmlNode* c_child
+ if c_name is NULL:
+ # always match
+ return c_node
+ while c_node is not NULL:
+ if _tagMatches(c_node, c_href, c_name):
+ return c_node
+ if c_node.children is not NULL:
+ c_child = _findDepthFirstInFollowing(c_node.children, c_href, c_name)
+ if c_child is not NULL:
+ return c_child
+ c_node = _nextElement(c_node)
+ return NULL
+
+cdef int _tagMatches(xmlNode* c_node, char* c_href, char* c_name):
+ if c_name is NULL:
+ # always match
+ return 1
+ if c_href is NULL:
+ if c_node.ns is not NULL and c_node.ns.href is not NULL:
+ return 0
+ return cstd.strcmp(c_node.name, c_name) == 0
+ elif c_node.ns is NULL or c_node.ns.href is NULL:
+ return 0
+ else:
+ return cstd.strcmp(c_node.name, c_name) == 0 and \
+ cstd.strcmp(c_node.ns.href, c_href) == 0
+
cdef void _removeNode(xmlNode* c_node):
"""Unlink and free a node and subnodes if possible.
"""
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Fri May 12 16:26:28 2006
@@ -879,11 +879,9 @@
return None
def getiterator(self, tag=None):
- iterator = ElementDepthFirstIterator(self)
- if tag is None or tag == '*':
- return iterator
- else:
- return ElementTagFilter(iterator, tag)
+ if tag == '*':
+ tag = None
+ return ElementDepthFirstIterator(self, tag)
def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
"Creates a new element associated with the same document."
@@ -1194,78 +1192,85 @@
cdef class ElementDepthFirstIterator:
"""Iterates over an element and its sub-elements in document order (depth
- first pre-order)."""
+ first pre-order).
+
+ If the 'tag' argument is not None, it returns only the elements that match
+ the respective name and namespace.
+ """
# we keep Python references here to control GC
# keep next node to return and a stack of position state in the tree
- cdef object _stack
+ cdef object _pystrings
+ cdef char* _href
+ cdef char* _name
+ cdef Py_ssize_t _depth
cdef _NodeBase _next_node
- def __init__(self, _NodeBase node not None):
- cdef xmlNode* c_node
+ def __init__(self, _NodeBase node not None, tag=None):
self._next_node = node
- self._stack = []
- self._findAndPushNextNode(node)
+ self._depth = 0
+
+ if tag is None:
+ self._href = NULL
+ self._name = NULL
+ else:
+ self._pystrings = _getNsTag(tag)
+ if self._pystrings[0] is None:
+ self._href = NULL
+ else:
+ self._href = _cstr(self._pystrings[0])
+ self._name = _cstr(self._pystrings[1])
+
+ if not _tagMatches(node._c_node, self._href, self._name):
+ # this cannot raise StopIteration, self._next_node != None
+ self.next()
+
def __iter__(self):
return self
+
def __next__(self):
- cdef xmlNode* c_node
- cdef _NodeBase next_node
+ cdef _NodeBase current_node
current_node = self._next_node
if current_node is None:
raise StopIteration
- stack = self._stack
- if python.PyList_GET_SIZE(stack) == 0:
- self._next_node = None
- return current_node
- next_node = stack[-1]
- self._next_node = next_node
- self._findAndPushNextNode(next_node)
+ self._findAndPushNextNode()
return current_node
- cdef void _findAndPushNextNode(self, _NodeBase node):
+ cdef void _findAndPushNextNode(self):
+ cdef _NodeBase node
cdef xmlNode* c_node
- stack = self._stack
- # try next child level until we hit a leaf
- c_node = _findChildForwards(node._c_node, 0)
+ cdef xmlNode* c_parent
+ # find in descendants
+ node = self._next_node
+ c_parent = node._c_node
+ c_node = _findDepthFirstInDescendents(c_parent, self._href, self._name)
if c_node is NULL:
- pop = stack.pop
- while c_node is NULL and python.PyList_GET_SIZE(stack):
- # walk up the stack until we find a sibling
- node = pop()
- c_node = _nextElement(node._c_node)
- if c_node is not NULL:
- python.PyList_Append(
- stack, _elementFactory(node._doc, c_node))
-
-cdef class ElementTagFilter:
- cdef object _iterator
- cdef object _pystrings
- cdef char* _href
- cdef char* _name
- def __init__(self, element_iterator, tag):
- self._iterator = iter(element_iterator)
- ns_href, name = _getNsTag(tag)
- self._pystrings = (ns_href, name) # keep Python references
- self._name = _cstr(name)
- if ns_href is None:
- self._href = NULL
- else:
- self._href = _cstr(ns_href)
- def __iter__(self):
- return self
- def __next__(self):
- cdef _NodeBase node
- while 1:
- node = self._iterator.next()
- if self._tagMatches(node._c_node):
- return node
-
- cdef int _tagMatches(self, xmlNode* c_node):
- if cstd.strcmp(c_node.name, self._name) == 0:
- if c_node.ns == NULL or c_node.ns.href == NULL:
- return self._href == NULL
- else:
- return cstd.strcmp(c_node.ns.href, self._href) == 0
- return 0
+ if self._depth < 1:
+ # nothing left to traverse
+ self._next_node = None
+ return
+ # try siblings
+ c_node = _findDepthFirstInFollowingSiblings(
+ c_parent, self._href, self._name)
+
+ while c_node is NULL and self._depth > 1:
+ # walk up the parent pointers and continue with siblings
+ c_parent = c_parent.parent
+ self._depth = self._depth - 1
+ if c_parent is NULL or not _isElement(c_parent):
+ break
+ c_node = _findDepthFirstInFollowingSiblings(
+ c_parent, self._href, self._name)
+
+ if c_node is NULL:
+ self._next_node = None
+ return # all found, nothing left
+ # we are at a sibling, so set c_parent to our parent
+ c_parent = c_parent.parent
+
+ self._next_node = _elementFactory(node._doc, c_node)
+ # fix depth counter by looking up path to original parent
+ while c_node is not c_parent:
+ self._depth = self._depth + 1
+ c_node = c_node.parent
cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf) except NULL:
cdef xmlNode* c_node
Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd (original)
+++ lxml/trunk/src/lxml/python.pxd Fri May 12 16:26:28 2006
@@ -30,6 +30,8 @@
cdef Py_ssize_t PyList_GET_SIZE(object l)
cdef int PyList_Append(object l, object obj)
cdef int PyList_Reverse(object l)
+ cdef int PyList_Insert(object l, Py_ssize_t index, object o)
+ cdef object PyList_GET_ITEM(object l, Py_ssize_t index)
cdef int PyDict_SetItemString(object d, char* key, object value)
cdef int PyDict_SetItem(object d, object key, object value)
cdef PyObject* PyDict_GetItemString(object d, char* key)
From scoder at codespeak.net Fri May 12 16:34:53 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 12 16:34:55 2006
Subject: [Lxml-checkins] r27135 - lxml/trunk/src/lxml
Message-ID: <20060512143453.CF773100B4@code0.codespeak.net>
Author: scoder
Date: Fri May 12 16:34:52 2006
New Revision: 27135
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
moved special case for (tag == '*') from getiterator() into iterator class
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Fri May 12 16:34:52 2006
@@ -879,8 +879,6 @@
return None
def getiterator(self, tag=None):
- if tag == '*':
- tag = None
return ElementDepthFirstIterator(self, tag)
def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
@@ -1207,7 +1205,8 @@
def __init__(self, _NodeBase node not None, tag=None):
self._next_node = node
self._depth = 0
-
+ if tag == '*':
+ tag = None
if tag is None:
self._href = NULL
self._name = NULL
From scoder at codespeak.net Fri May 12 17:30:07 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 12 17:30:07 2006
Subject: [Lxml-checkins] r27141 - lxml/trunk/src/lxml
Message-ID: <20060512153007.185A81007F@code0.codespeak.net>
Author: scoder
Date: Fri May 12 17:30:05 2006
New Revision: 27141
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
doc clarifications, fixed name of _findAndPushNextNode to _prepareNextNode
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Fri May 12 17:30:05 2006
@@ -1192,8 +1192,11 @@
"""Iterates over an element and its sub-elements in document order (depth
first pre-order).
- If the 'tag' argument is not None, it returns only the elements that match
- the respective name and namespace.
+ If the optional 'tag' argument is not None, it returns only the elements
+ that match the respective name and namespace.
+
+ Note that the behaviour of this iterator is completely undefined if the
+ tree it traverses is modified during iteration.
"""
# we keep Python references here to control GC
# keep next node to return and a stack of position state in the tree
@@ -1230,10 +1233,10 @@
current_node = self._next_node
if current_node is None:
raise StopIteration
- self._findAndPushNextNode()
+ self._prepareNextNode()
return current_node
- cdef void _findAndPushNextNode(self):
+ cdef void _prepareNextNode(self):
cdef _NodeBase node
cdef xmlNode* c_node
cdef xmlNode* c_parent
@@ -1251,7 +1254,7 @@
c_parent, self._href, self._name)
while c_node is NULL and self._depth > 1:
- # walk up the parent pointers and continue with siblings
+ # walk up the parent pointers and continue with their siblings
c_parent = c_parent.parent
self._depth = self._depth - 1
if c_parent is NULL or not _isElement(c_parent):
From scoder at codespeak.net Fri May 12 18:03:23 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 12 18:03:24 2006
Subject: [Lxml-checkins] r27143 - lxml/branch/lxml-0.9.x/doc
Message-ID: <20060512160323.1A3CA100BD@code0.codespeak.net>
Author: scoder
Date: Fri May 12 18:03:21 2006
New Revision: 27143
Modified:
lxml/branch/lxml-0.9.x/doc/main.txt
Log:
doc updates as encouraged by David Sankel
Modified: lxml/branch/lxml-0.9.x/doc/main.txt
==============================================================================
--- lxml/branch/lxml-0.9.x/doc/main.txt (original)
+++ lxml/branch/lxml-0.9.x/doc/main.txt Fri May 12 18:03:21 2006
@@ -16,37 +16,21 @@
News
----
-* 2006-05-10: `lxml 0.9.2`_ released (`changes for 0.9.2`_)
+* 2006-05-10: lxml 0.9.2 released (`changes for 0.9.2`_)
-* 2006-03-30: `lxml 0.9.1`_ released (`changes for 0.9.1`_)
+* 2006-03-30: lxml 0.9.1 released (`changes for 0.9.1`_)
-* 2006-03-20: `lxml 0.9`_ released (`changes for 0.9`_)
+* 2006-03-20: lxml 0.9 released (`changes for 0.9`_)
-* 2005-11-03: `lxml 0.8`_ released (`changes for 0.8`_)
+* 2005-11-03: lxml 0.8 released (`changes for 0.8`_)
-* 2005-06-15: `lxml 0.7`_ released (`changes for 0.7`_)
+* 2005-06-15: lxml 0.7 released (`changes for 0.7`_)
-* 2005-05-14: `lxml 0.6`_ released (`changes for 0.6`_)
+* 2005-05-14: lxml 0.6 released (`changes for 0.6`_)
-* 2005-04-09: `lxml 0.5.1`_ released (`changes for 0.5.1`_)
+* 2005-04-09: lxml 0.5.1 released (`changes for 0.5.1`_)
-* 2005-04-08: `lxml 0.5`_ released!
-
-.. _`lxml 0.9.2`: lxml-0.9.2.tgz
-
-.. _`lxml 0.9.1`: lxml-0.9.1.tgz
-
-.. _`lxml 0.9`: lxml-0.9.tgz
-
-.. _`lxml 0.8`: lxml-0.8.tgz
-
-.. _`lxml 0.7`: lxml-0.7.tgz
-
-.. _`lxml 0.6`: lxml-0.6.tgz
-
-.. _`lxml 0.5.1`: lxml-0.5.1.tgz
-
-.. _`lxml 0.5`: lxml-0.5.tgz
+* 2005-04-08: lxml 0.5 released!
.. _`CHANGES for 0.9.2`: changes-0.9.2.html
@@ -113,8 +97,15 @@
Download
--------
+The best way to download binary versions is to visit `lxml at the Python
+cheeseshop`_. It has the source, eggs and installers for various platforms.
+
.. _`lxml at the Python cheeseshop`: http://cheeseshop.python.org/pypi/lxml/
+Please take a look at the `installation instructions`_!
+
+.. _`installation instructions`: installation.html
+
* `lxml 0.9.2`_ (2006-05-10)
* `lxml 0.9.1`_ (2006-03-30)
@@ -131,13 +122,21 @@
* `lxml 0.5`_ (2005-04-08)
-Instead of downloading the source here, you can also find `lxml at the
-Python cheeseshop`_ in source, egg and installer form for various
-platforms.
+.. _`lxml 0.9.2`: lxml-0.9.2.tgz
-See also the `installation instructions`_.
+.. _`lxml 0.9.1`: lxml-0.9.1.tgz
-.. _`installation instructions`: installation.html
+.. _`lxml 0.9`: lxml-0.9.tgz
+
+.. _`lxml 0.8`: lxml-0.8.tgz
+
+.. _`lxml 0.7`: lxml-0.7.tgz
+
+.. _`lxml 0.6`: lxml-0.6.tgz
+
+.. _`lxml 0.5.1`: lxml-0.5.1.tgz
+
+.. _`lxml 0.5`: lxml-0.5.tgz
It's also possible to check out the latest development version of lxml
from svn directly, using a command like this::
From scoder at codespeak.net Fri May 12 18:48:57 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 12 18:48:59 2006
Subject: [Lxml-checkins] r27145 - lxml/branch/lxml-0.9.x/doc
Message-ID: <20060512164857.1832E100BA@code0.codespeak.net>
Author: scoder
Date: Fri May 12 18:48:55 2006
New Revision: 27145
Modified:
lxml/branch/lxml-0.9.x/doc/main.txt
Log:
merged news section into download section
Modified: lxml/branch/lxml-0.9.x/doc/main.txt
==============================================================================
--- lxml/branch/lxml-0.9.x/doc/main.txt (original)
+++ lxml/branch/lxml-0.9.x/doc/main.txt Fri May 12 18:48:55 2006
@@ -13,24 +13,49 @@
.. _introduction: intro.html
-News
-----
+Download
+--------
+
+The best way to download binary versions is to visit `lxml at the Python
+cheeseshop`_. It has the source, eggs and installers for various platforms.
+
+.. _`lxml at the Python cheeseshop`: http://cheeseshop.python.org/pypi/lxml/
+
+Please take a look at the `installation instructions`_!
+
+.. _`installation instructions`: installation.html
+
+* `lxml 0.9.2`_, released 2006-05-10 (`changes for 0.9.2`_)
-* 2006-05-10: lxml 0.9.2 released (`changes for 0.9.2`_)
+* `lxml 0.9.1`_, released 2006-03-30 (`changes for 0.9.1`_)
-* 2006-03-30: lxml 0.9.1 released (`changes for 0.9.1`_)
+* `lxml 0.9`_, released 2006-03-20 (`changes for 0.9`_)
-* 2006-03-20: lxml 0.9 released (`changes for 0.9`_)
+* `lxml 0.8`_, released 2005-11-03 (`changes for 0.8`_)
-* 2005-11-03: lxml 0.8 released (`changes for 0.8`_)
+* `lxml 0.7`_, released 2005-06-15 (`changes for 0.7`_)
-* 2005-06-15: lxml 0.7 released (`changes for 0.7`_)
+* `lxml 0.6`_, released 2005-05-14 (`changes for 0.6`_)
-* 2005-05-14: lxml 0.6 released (`changes for 0.6`_)
+* `lxml 0.5.1`_, released 2005-04-09 (`changes for 0.5.1`_)
-* 2005-04-09: lxml 0.5.1 released (`changes for 0.5.1`_)
+* `lxml 0.5`_, released 2005-04-08
-* 2005-04-08: lxml 0.5 released!
+.. _`lxml 0.9.2`: lxml-0.9.2.tgz
+
+.. _`lxml 0.9.1`: lxml-0.9.1.tgz
+
+.. _`lxml 0.9`: lxml-0.9.tgz
+
+.. _`lxml 0.8`: lxml-0.8.tgz
+
+.. _`lxml 0.7`: lxml-0.7.tgz
+
+.. _`lxml 0.6`: lxml-0.6.tgz
+
+.. _`lxml 0.5.1`: lxml-0.5.1.tgz
+
+.. _`lxml 0.5`: lxml-0.5.tgz
.. _`CHANGES for 0.9.2`: changes-0.9.2.html
@@ -46,6 +71,19 @@
.. _`CHANGES for 0.5.1`: changes-0.5.1.html
+It's also possible to check out the latest development version of lxml
+from svn directly, using a command like this::
+
+ svn co http://codespeak.net/svn/lxml/trunk lxml
+
+You can also `browse it through the web`_. The `latest CHANGES`_ of the
+developer version are also accessible. You can check there if a bug you found
+has been fixed or a feature you want has been implemented in the latest trunk
+version.
+
+.. _`browse it through the web`: http://codespeak.net/svn/lxml
+.. _`latest CHANGES`: http://codespeak.net/svn/lxml/trunk/CHANGES.txt
+
Documentation
-------------
@@ -94,63 +132,6 @@
.. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev
-Download
---------
-
-The best way to download binary versions is to visit `lxml at the Python
-cheeseshop`_. It has the source, eggs and installers for various platforms.
-
-.. _`lxml at the Python cheeseshop`: http://cheeseshop.python.org/pypi/lxml/
-
-Please take a look at the `installation instructions`_!
-
-.. _`installation instructions`: installation.html
-
-* `lxml 0.9.2`_ (2006-05-10)
-
-* `lxml 0.9.1`_ (2006-03-30)
-
-* `lxml 0.9`_ (2006-03-20)
-
-* `lxml 0.8`_ (2005-11-03)
-
-* `lxml 0.7`_ (2005-06-15)
-
-* `lxml 0.6`_ (2005-05-14)
-
-* `lxml 0.5.1`_ (2005-04-09)
-
-* `lxml 0.5`_ (2005-04-08)
-
-.. _`lxml 0.9.2`: lxml-0.9.2.tgz
-
-.. _`lxml 0.9.1`: lxml-0.9.1.tgz
-
-.. _`lxml 0.9`: lxml-0.9.tgz
-
-.. _`lxml 0.8`: lxml-0.8.tgz
-
-.. _`lxml 0.7`: lxml-0.7.tgz
-
-.. _`lxml 0.6`: lxml-0.6.tgz
-
-.. _`lxml 0.5.1`: lxml-0.5.1.tgz
-
-.. _`lxml 0.5`: lxml-0.5.tgz
-
-It's also possible to check out the latest development version of lxml
-from svn directly, using a command like this::
-
- svn co http://codespeak.net/svn/lxml/trunk lxml
-
-You can also `browse it through the web`_. The `latest CHANGES`_ of the
-developer version are also accessible. You can check there if a bug you found
-has been fixed or a feature you want has been implemented in the latest trunk
-version.
-
-.. _`browse it through the web`: http://codespeak.net/svn/lxml
-.. _`latest CHANGES`: http://codespeak.net/svn/lxml/trunk/CHANGES.txt
-
License
-------
From scoder at codespeak.net Fri May 12 23:12:37 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 12 23:12:40 2006
Subject: [Lxml-checkins] r27155 - lxml/branch/lxml-0.9.x/doc
Message-ID: <20060512211237.8137F100B8@code0.codespeak.net>
Author: scoder
Date: Fri May 12 23:12:36 2006
New Revision: 27155
Modified:
lxml/branch/lxml-0.9.x/doc/main.txt
Log:
removed blank lines between links in main.txt
Modified: lxml/branch/lxml-0.9.x/doc/main.txt
==============================================================================
--- lxml/branch/lxml-0.9.x/doc/main.txt (original)
+++ lxml/branch/lxml-0.9.x/doc/main.txt Fri May 12 23:12:36 2006
@@ -42,33 +42,20 @@
* `lxml 0.5`_, released 2005-04-08
.. _`lxml 0.9.2`: lxml-0.9.2.tgz
-
.. _`lxml 0.9.1`: lxml-0.9.1.tgz
-
.. _`lxml 0.9`: lxml-0.9.tgz
-
.. _`lxml 0.8`: lxml-0.8.tgz
-
.. _`lxml 0.7`: lxml-0.7.tgz
-
.. _`lxml 0.6`: lxml-0.6.tgz
-
.. _`lxml 0.5.1`: lxml-0.5.1.tgz
-
.. _`lxml 0.5`: lxml-0.5.tgz
.. _`CHANGES for 0.9.2`: changes-0.9.2.html
-
.. _`CHANGES for 0.9.1`: changes-0.9.1.html
-
.. _`CHANGES for 0.9`: changes-0.9.html
-
.. _`CHANGES for 0.8`: changes-0.8.html
-
.. _`CHANGES for 0.7`: changes-0.7.html
-
.. _`CHANGES for 0.6`: changes-0.6.html
-
.. _`CHANGES for 0.5.1`: changes-0.5.1.html
It's also possible to check out the latest development version of lxml
@@ -104,25 +91,15 @@
in the standar dlibrary.
.. _`ElementTree API`: http://effbot.org/zone/element-index.htm
-
.. _`ElementTree compatibility overview`: compatibility.html
-
.. _`extends this API`: api.html
-
.. _`extension functions`: extensions.html
-
.. _XPath: http://www.w3.org/TR/xpath
-
.. _`Relax NG`: http://www.relaxng.org/
-
.. _`XML Schema`: http://www.w3.org/XML/Schema
-
.. _`XSLT`: http://www.w3.org/TR/xslt
-
.. _`c14n`: http://www.w3.org/TR/2001/REC-xml-c14n-20010315
-
.. _`implementing namespaces`: namespace_extensions.html
-
.. _`SAX compliant API`: sax.html
Mailing list
From scoder at codespeak.net Fri May 12 23:13:50 2006
From: scoder at codespeak.net (scoder@codespeak.net)
Date: Fri May 12 23:13:52 2006
Subject: [Lxml-checkins] r27156 - lxml/trunk
Message-ID: <20060512211350.9E49C100B8@code0.codespeak.net>
Author: scoder
Date: Fri May 12 23:13:49 2006
New Revision: 27156
Modified:
lxml/trunk/bench.py
Log:
variable renamed
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Fri May 12 23:13:49 2006
@@ -582,8 +582,8 @@
name = 'bench_' + name
selected.append(name)
benchmarks = [ [ b for b in bs
- if [ contains for contains in selected
- if contains in b[0] ] ]
+ if [ match for match in selected
+ if match in b[0] ] ]
for bs in benchmarks ]
import time
From ogrisel at codespeak.net Mon May 15 11:27:17 2006
From: ogrisel at codespeak.net (ogrisel@codespeak.net)
Date: Mon May 15 11:27:18 2006
Subject: [Lxml-checkins] r27224 - lxml/www
Message-ID: <20060515092717.505FD10093@code0.codespeak.net>
Author: ogrisel
Date: Mon May 15 11:27:16 2006
New Revision: 27224
Modified:
lxml/www/style.css
Log:
new CSS style for the codespeak website (smaller fonts, centered text + various style improvements)
Modified: lxml/www/style.css
==============================================================================
--- lxml/www/style.css (original)
+++ lxml/www/style.css Mon May 15 11:27:16 2006
@@ -1,31 +1,60 @@
body {
+ /* CSS Hack for IE that does not respect the "margin: auto" rule at the
+ * document level */
+ text-align: center;
+ padding: 1em;
+}
+
+div.document {
+ width: 45em;
+ font: 13px Arial, Verdana, Helvetica, sans-serif;
+ margin: 1em auto 1em auto;
+ background-color: white;
+ color: #222;
+ text-align: left;
+}
+
+h1.title {
background: url(http://codespeak.net/img/codespeak1b.png) no-repeat;
- font: 120% Arial, Verdana, Helvetica, sans-serif;
- border: 0;
- margin: 0.5em 0em 0.5em 0.5em;
- padding: 0 0 0 145px;
+ padding: 20px 0 0 180px;
+ height: 60px;
+ font-size: 200%;
}
-a {
- text-decoration: underline;
- background-color: transparent;
+h1, h2, h3 {
+ color: #333;
+ font-weight: bold;
}
-p {
- /*margin: 0.5em 0em 1em 0em;*/
- text-align: left;
- line-height: 1.5em;
- margin: 0.5em 0em 0em 0em;
+h1 {
+ font-size: 120%;
}
-p a {
- text-decoration: underline;
+h2 {
+ font-size: 110%;
}
+h3 {
+ font-size: 105%;
+}
-p a:active {
- color: Red;
+a, a:visited {
background-color: transparent;
+ font-weight: bold;
+ color: Black;
+ text-decoration: none;
+}
+
+a:active {
+ color: Red;
+ text-decoration: underline;
+}
+
+p {
+ /*margin: 0.5em 0em 1em 0em;*/
+ text-align: justify;
+ line-height: 1.5em;
+ margin: 0.5em 0em 0em 0em;
}
hr {
@@ -35,10 +64,8 @@
background-color: transparent;
}
-
-ul {
+ul {
line-height: 1.5em;
- /*list-style-image: url("bullet.gif"); */
margin-left: 1em;
}
@@ -47,28 +74,21 @@
margin-left: 0em;
}
-ul a, ol a {
- text-decoration: underline;
-}
-
blockquote {
font-family: Times, "Times New Roman", serif;
font-style: italic;
- font-size: 120%;
}
code {
- font-size: 120%;
color: Black;
- /*background-color: #dee7ec;*/
background-color: #cccccc;
+ font-family: "Courier New", Courier, monospace;
}
pre {
- font-size: 120%;
- padding: 1em;
+ padding: 0.5em;
border: 1px solid #8cacbb;
color: Black;
- background-color: #dee7ec;
background-color: #cccccc;
+ font-family: "Courier New", Courier, monospace;
}
From ogrisel at codespeak.net Mon May 15 11:28:00 2006
From: ogrisel at codespeak.net (ogrisel@codespeak.net)
Date: Mon May 15 11:28:01 2006
Subject: [Lxml-checkins] r27225 - lxml/www
Message-ID: <20060515092800.80F8210093@code0.codespeak.net>
Author: ogrisel
Date: Mon May 15 11:27:59 2006
New Revision: 27225
Modified:
lxml/www/publish.py
Log:
changed the publish.py script to remove harcoded reference to the style.css url
Modified: lxml/www/publish.py
==============================================================================
--- lxml/www/publish.py (original)
+++ lxml/www/publish.py Mon May 15 11:27:59 2006
@@ -1,9 +1,12 @@
-import os, sys
+import os, shutil, sys
def publish(dirname, lxml_path, release):
if not os.path.exists(dirname):
os.mkdir(dirname)
- stylesheet_url = 'http://codespeak.net/lxml/style.css'
+ stylesheet_url = 'style.css'
+
+ shutil.copy(stylesheet_url, dirname)
+
for name in ['main.txt', 'intro.txt', 'api.txt', 'compatibility.txt',
'extensions.txt', 'namespace_extensions.txt', 'sax.txt']:
path = os.path.join(lxml_path, 'doc', name)
@@ -22,10 +25,10 @@
os.path.join(dirname, 'index.html'))
def rest2html(source_path, dest_path, stylesheet_url):
-
- command = ('rest2html --stylesheet=%s %s > %s' %
+
+ command = ('rest2html --stylesheet=%s --link-stylesheet %s > %s' %
(stylesheet_url, source_path, dest_path))
os.system(command)
-
+
if __name__ == '__main__':
publish(sys.argv[1], sys.argv[2], sys.argv[3])
From scoder at codespeak.net Tue May 16 19:42:09 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 16 May 2006 19:42:09 +0200 (CEST)
Subject: [Lxml-checkins] r27294 - in lxml/trunk: . src/lxml
Message-ID: <20060516174209.6F8A110077@code0.codespeak.net>
Author: scoder
Date: Tue May 16 19:42:03 2006
New Revision: 27294
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/tree.pxd
Log:
rewrite of ElementTree.write() to write directly to file/file-like instead of serializing to memory
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Tue May 16 19:42:03 2006
@@ -7,6 +7,9 @@
Features added
--------------
+* ElementTree.write() no longer serializes in memory (reduced memory
+ footprint)
+
* Speedup of Element.findall(tag) and Element.getiterator(tag)
* Support for writing the XML representation of Elements and ElementTrees to
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Tue May 16 19:42:03 2006
@@ -1,41 +1,31 @@
-# Private helper functions
+# Private helper functions for input/output and API functions
+
+# XML I/O helpers
-cdef _tostring(_NodeBase element, encoding, int xml_declaration):
+cdef _tostring(_NodeBase element, encoding, int write_xml_declaration):
"Serialize an element to an encoded string representation of its XML tree."
cdef _Document doc
cdef tree.xmlOutputBuffer* c_buffer
cdef tree.xmlBuffer* c_result_buffer
cdef tree.xmlCharEncodingHandler* enchandler
- cdef char* enc
+ cdef char* c_enc
+ cdef char* c_version
if element is None:
return None
if encoding in ('utf8', 'UTF8', 'utf-8'):
encoding = 'UTF-8'
doc = element._doc
- enc = encoding
+ c_enc = encoding
# it is necessary to *and* find the encoding handler *and* use
# encoding during output
- enchandler = tree.xmlFindCharEncodingHandler(enc)
+ enchandler = tree.xmlFindCharEncodingHandler(c_enc)
c_buffer = tree.xmlAllocOutputBuffer(enchandler)
if c_buffer is NULL:
raise LxmlError, "Failed to create output buffer"
- if xml_declaration:
- if doc._c_doc.version is NULL:
- version = "1.0"
- else:
- version = doc._c_doc.version
- xml_decl = "" % (
- version, encoding)
- tree.xmlOutputBufferWriteString(c_buffer, "\n")
-
try:
- tree.xmlNodeDumpOutput(c_buffer, doc._c_doc, element._c_node, 0, 0, enc)
- _dumpNextNode(c_buffer, doc._c_doc, element._c_node, enc)
+ _writeNodeToBuffer(c_buffer, doc._c_doc, element._c_node,
+ doc._c_doc.version, c_enc, write_xml_declaration)
tree.xmlOutputBufferFlush(c_buffer)
if c_buffer.conv is not NULL:
c_result_buffer = c_buffer.conv
@@ -60,8 +50,8 @@
if c_buffer is NULL:
raise LxmlError, "Failed to create output buffer"
try:
- tree.xmlNodeDumpOutput(c_buffer, doc._c_doc, element._c_node, 0, 0, NULL)
- _dumpNextNode(c_buffer, doc._c_doc, element._c_node, NULL)
+ _writeNodeToBuffer(c_buffer, doc._c_doc, element._c_node,
+ NULL, NULL, 0)
tree.xmlOutputBufferFlush(c_buffer)
if c_buffer.conv is not NULL:
c_result_buffer = c_buffer.conv
@@ -75,6 +65,101 @@
tree.xmlOutputBufferClose(c_buffer)
return result
+cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer,
+ xmlDoc* c_doc, xmlNode* c_node,
+ char* xml_version, char* encoding,
+ int write_xml_declaration):
+ if write_xml_declaration:
+ _writeDeclarationToBuffer(c_buffer, xml_version, encoding)
+
+ tree.xmlNodeDumpOutput(c_buffer, c_doc, c_node, 0, 0, encoding)
+ _dumpNextNode(c_buffer, c_doc, c_node, encoding)
+
+cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
+ char* version, char* encoding):
+ if version is NULL:
+ version = "1.0"
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
+
+# output to file-like objects
+cdef class _FileWriter:
+ cdef object _filelike
+ cdef _ExceptionContext _exc_context
+ def __init__(self, filelike, exc_context=None):
+ self._filelike = filelike
+ if exc_context is None:
+ self._exc_context = _ExceptionContext()
+ else:
+ self._exc_context = exc_context
+
+ cdef tree.xmlOutputBuffer* _createOutputBuffer(
+ self, tree.xmlCharEncodingHandler* enchandler) except NULL:
+ cdef tree.xmlOutputBuffer* c_buffer
+ c_buffer = tree.xmlOutputBufferCreateIO(
+ _writeFilelikeWriter, _closeFilelikeWriter,
+ self, enchandler)
+ if c_buffer is NULL:
+ raise IOError, "Could not create I/O writer context."
+ return c_buffer
+
+ cdef int write(self, char* c_buffer, int len):
+ try:
+ if self._filelike is None:
+ raise IOError, "File is already closed"
+ py_buffer = python.PyString_FromStringAndSize(c_buffer, len)
+ self._filelike.write(py_buffer)
+ return len
+ except Exception:
+ self._exc_context._store_raised()
+ return -1
+
+ cdef int close(self):
+ # we should not close the file here as we didn't open it
+ self._filelike = None
+ return 0
+
+cdef int _writeFilelikeWriter(void* ctxt, char* c_buffer, int len):
+ return (<_FileWriter>ctxt).write(c_buffer, len)
+
+cdef int _closeFilelikeWriter(void* ctxt):
+ return (<_FileWriter>ctxt).close()
+
+cdef _tofile(f, _NodeBase element, encoding, int write_declaration):
+ cdef _FileWriter writer
+ cdef tree.xmlOutputBuffer* c_buffer
+ cdef tree.xmlCharEncodingHandler* enchandler
+ cdef char* c_enc
+ if encoding is None:
+ c_enc = NULL
+ else:
+ c_enc = encoding
+
+ enchandler = tree.xmlFindCharEncodingHandler(c_enc)
+ if python.PyString_Check(f) or python.PyUnicode_Check(f):
+ filename = _utf8(f)
+ c_buffer = tree.xmlOutputBufferCreateFilename(
+ _cstr(filename), enchandler, 0)
+ elif hasattr(f, 'write'):
+ writer = _FileWriter(f)
+ c_buffer = writer._createOutputBuffer(enchandler)
+ else:
+ raise TypeError, "File or filename expected, got '%s'" % type(f)
+
+ _writeNodeToBuffer(c_buffer,
+ element._doc._c_doc, element._c_node,
+ element._doc._c_doc.version, c_enc,
+ write_declaration)
+
+ tree.xmlOutputBufferClose(c_buffer)
+ if writer is not None:
+ writer._exc_context._raise_if_stored()
+
+# Private helper functions
+
cdef void displayNode(xmlNode* c_node, indent):
# to help with debugging
cdef xmlNode* c_child
@@ -189,8 +274,8 @@
tree.xmlOutputBufferWriteString(c_buffer, '\n')
tree.xmlOutputBufferFlush(c_buffer)
-cdef _dumpNextNode(tree.xmlOutputBuffer* c_buffer, xmlDoc* c_doc,
- xmlNode* c_node, char* encoding):
+cdef void _dumpNextNode(tree.xmlOutputBuffer* c_buffer, xmlDoc* c_doc,
+ xmlNode* c_node, char* encoding):
cdef xmlNode* c_next
c_next = c_node.next
if c_next is not NULL and c_next.type == tree.XML_TEXT_NODE:
@@ -525,4 +610,3 @@
while c_attr_current is not NULL:
changeDocumentBelowHelper(c_current, doc)
c_attr_current = c_attr_current.next
-
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Tue May 16 19:42:03 2006
@@ -347,17 +347,14 @@
return DocInfo(self._doc)
def write(self, file, encoding='us-ascii'):
- if not hasattr(file, 'write'):
- # file is a filename, we want a file object
- file = open(file, 'wb')
-
- m = tostring(self._context_node, encoding)
- # XXX this is purely for ElementTree compatibility..
+ if encoding in ('utf8', 'UTF8', 'utf-8'):
+ encoding = 'UTF-8'
if encoding == 'UTF-8' or encoding == 'us-ascii':
- m = _stripDeclaration(m)
- if m[-1:] == '\n':
- m = m[:-1]
- file.write(m)
+ # XXX this is purely for ElementTree compatibility..
+ write_declaration = 0
+ else:
+ write_declaration = 1
+ _tofile(file, self._context_node, encoding, write_declaration)
def getiterator(self, tag=None):
root = self.getroot()
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Tue May 16 19:42:03 2006
@@ -129,21 +129,21 @@
cdef xmlparser.xmlParserInputBuffer* c_buffer
c_buffer = xmlparser.xmlAllocParserInputBuffer(0)
c_buffer.context = self
- c_buffer.readcallback = _copyFilelike
+ c_buffer.readcallback = _readFilelikeParser
return xmlparser.xmlNewIOInputStream(ctxt, c_buffer, 0)
cdef xmlDoc* _readDoc(self, xmlParserCtxt* ctxt, int options,
LxmlParserType parser_type):
if parser_type == LXML_XML_PARSER:
return xmlparser.xmlCtxtReadIO(
- ctxt, _copyFilelike, NULL, self,
+ ctxt, _readFilelikeParser, NULL, self,
self._c_url, NULL, options)
else:
return htmlparser.htmlCtxtReadIO(
- ctxt, _copyFilelike, NULL, self,
+ ctxt, _readFilelikeParser, NULL, self,
self._c_url, NULL, options)
- cdef int write(self, char* c_buffer, int c_size):
+ cdef int copyToBuffer(self, char* c_buffer, int c_size):
cdef char* c_start
cdef Py_ssize_t byte_count, remaining
if self._bytes_read < 0:
@@ -168,9 +168,8 @@
self._exc_context._store_raised()
return -1
-cdef int _copyFilelike(void* ctxt, char* c_buffer, int c_size):
- return (<_FileParserContext>ctxt).write(c_buffer, c_size)
-
+cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size):
+ return (<_FileParserContext>ctxt).copyToBuffer(c_buffer, c_size)
############################################################
## support for custom document loaders
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Tue May 16 19:42:03 2006
@@ -204,17 +204,26 @@
cdef int xmlBufferLength(xmlBuffer* buf)
cdef extern from "libxml/xmlIO.h":
- cdef xmlOutputBuffer* xmlAllocOutputBuffer(xmlCharEncodingHandler* encoder)
- cdef xmlOutputBuffer* xmlOutputBufferCreateFile(
- FILE* file,
- xmlCharEncodingHandler* encoder)
cdef int xmlOutputBufferWriteString(xmlOutputBuffer* out, char* str)
cdef int xmlOutputBufferFlush(xmlOutputBuffer* out)
cdef int xmlOutputBufferClose(xmlOutputBuffer* out)
ctypedef int (*xmlInputReadCallback)(void* context, char* buffer, int len)
- ctypedef int (*xmlInputCloseCallback)(void * context)
+ ctypedef int (*xmlInputCloseCallback)(void* context)
+
+ ctypedef int (*xmlOutputWriteCallback)(void* context, char* buffer, int len)
+ ctypedef int (*xmlOutputCloseCallback)(void* context)
+ cdef xmlOutputBuffer* xmlAllocOutputBuffer(xmlCharEncodingHandler* encoder)
+ cdef xmlOutputBuffer* xmlOutputBufferCreateIO(
+ xmlOutputWriteCallback iowrite,
+ xmlOutputCloseCallback ioclose,
+ void * ioctx,
+ xmlCharEncodingHandler* encoder)
+ cdef xmlOutputBuffer* xmlOutputBufferCreateFile(
+ FILE* file, xmlCharEncodingHandler* encoder)
+ cdef xmlOutputBuffer* xmlOutputBufferCreateFilename(
+ char* URI, xmlCharEncodingHandler* encoder, int compression)
cdef extern from "libxml/xmlsave.h":
ctypedef struct xmlSaveCtxt:
pass
From ogrisel at codespeak.net Tue May 16 23:01:22 2006
From: ogrisel at codespeak.net (ogrisel at codespeak.net)
Date: Tue, 16 May 2006 23:01:22 +0200 (CEST)
Subject: [Lxml-checkins] r27305 - lxml/www
Message-ID: <20060516210122.1442C10094@code0.codespeak.net>
Author: ogrisel
Date: Tue May 16 23:01:21 2006
New Revision: 27305
Modified:
lxml/www/style.css
Log:
style improvement to better distinguish headlines from regular links
Modified: lxml/www/style.css
==============================================================================
--- lxml/www/style.css (original)
+++ lxml/www/style.css Tue May 16 23:01:21 2006
@@ -21,21 +21,22 @@
font-size: 200%;
}
-h1, h2, h3 {
- color: #333;
+h1.title, h1 a, h2 a, h3 a {
+ color: #666;
font-weight: bold;
+ font-family: Helvetica, sans-serif;
}
h1 {
- font-size: 120%;
+ font-size: 150%;
}
h2 {
- font-size: 110%;
+ font-size: 130%;
}
h3 {
- font-size: 105%;
+ font-size: 110%;
}
a, a:visited {
@@ -47,6 +48,9 @@
a:active {
color: Red;
+}
+
+a:hover {
text-decoration: underline;
}
From ogrisel at codespeak.net Tue May 16 23:26:05 2006
From: ogrisel at codespeak.net (ogrisel at codespeak.net)
Date: Tue, 16 May 2006 23:26:05 +0200 (CEST)
Subject: [Lxml-checkins] r27308 - lxml/www
Message-ID: <20060516212605.BE515100A2@code0.codespeak.net>
Author: ogrisel
Date: Tue May 16 23:26:05 2006
New Revision: 27308
Modified:
lxml/www/style.css
Log:
reST headlines are links to nowhere by default thus hide there link style by specializing the a rules
Modified: lxml/www/style.css
==============================================================================
--- lxml/www/style.css (original)
+++ lxml/www/style.css Tue May 16 23:26:05 2006
@@ -46,11 +46,11 @@
text-decoration: none;
}
-a:active {
+p a:active, ul a:active {
color: Red;
}
-a:hover {
+p a:hover, ul a:hover {
text-decoration: underline;
}
From scoder at codespeak.net Tue May 16 23:28:28 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 16 May 2006 23:28:28 +0200 (CEST)
Subject: [Lxml-checkins] r27309 - lxml/trunk/doc
Message-ID: <20060516212828.52154100A2@code0.codespeak.net>
Author: scoder
Date: Tue May 16 23:28:27 2006
New Revision: 27309
Modified:
lxml/trunk/doc/extensions.txt
Log:
tiny cleanup in docs
Modified: lxml/trunk/doc/extensions.txt
==============================================================================
--- lxml/trunk/doc/extensions.txt (original)
+++ lxml/trunk/doc/extensions.txt Tue May 16 23:28:27 2006
@@ -101,7 +101,8 @@
>>> print e.evaluate('es:hello(local-name(/a))')
Ola a
- >>> e = etree.XPathEvaluator(doc, namespaces={'f' : 'http://mydomain.org/myfunctions'})
+ >>> namespaces = {'f' : 'http://mydomain.org/myfunctions'}
+ >>> e = etree.XPathEvaluator(doc, namespaces=namespaces)
>>> print e.evaluate('f:hello(local-name(/a))')
Hello a
From scoder at codespeak.net Wed May 17 00:10:13 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 00:10:13 +0200 (CEST)
Subject: [Lxml-checkins] r27310 - lxml/branch/xslt-access-control
Message-ID: <20060516221013.107841009D@code0.codespeak.net>
Author: scoder
Date: Wed May 17 00:10:11 2006
New Revision: 27310
Added:
lxml/branch/xslt-access-control/
- copied from r27309, lxml/trunk/
Log:
new branch for implementing file/network access control in XSLT
From scoder at codespeak.net Wed May 17 00:14:29 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 00:14:29 +0200 (CEST)
Subject: [Lxml-checkins] r27311 - in lxml/branch/xslt-access-control: doc
src/lxml
Message-ID: <20060516221429.131731009D@code0.codespeak.net>
Author: scoder
Date: Wed May 17 00:14:20 2006
New Revision: 27311
Modified:
lxml/branch/xslt-access-control/doc/resolvers.txt
lxml/branch/xslt-access-control/src/lxml/xslt.pxd
lxml/branch/xslt-access-control/src/lxml/xslt.pxi
Log:
first shot on XSLT access control using XSLTAccessControl wrapper class for libxslt:security API
Modified: lxml/branch/xslt-access-control/doc/resolvers.txt
==============================================================================
--- lxml/branch/xslt-access-control/doc/resolvers.txt (original)
+++ lxml/branch/xslt-access-control/doc/resolvers.txt Wed May 17 00:14:20 2006
@@ -186,3 +186,41 @@
inherits. For XSLT, the output document inherits the resolvers of the input
document and not those of the stylesheet. Therefore, the last result does not
inherit any resolvers at all.
+
+
+I/O access control in XSLT
+--------------------------
+
+XSLT has an additional mechanism to control the access certain I/O operations
+during the transformation process. This is most interesting where XSL scripts
+come from potentially insecure sources and must be prevented from modifying
+the local file system. Note, however, that there is no way to keep them from
+eating up your precious CPU time, so this should not stop you from thinking
+about what XSLT you execute.
+
+Access control is configured using the XSLTAccessControl class. It can be
+called with a number of keyword arguments that allow or deny specific
+operations::
+
+ >>> transform = etree.XSLT(honk_doc)
+ Resolving url honk:test as prefix honk ... done
+ >>> result = transform(normal_doc)
+ Resolving url hoi:test as prefix honk ... failed
+ Resolving url hoi:test as prefix hoi ... done
+
+ >>> ac = etree.XSLTAccessControl(read_network=False)
+ >>> transform = etree.XSLT(honk_doc, access_control=ac)
+ Resolving url honk:test as prefix honk ... done
+ >>> result = transform(normal_doc)
+ Traceback (most recent call last):
+ [...]
+ XSLTApplyError: runtime error (element 'value-of')
+
+There are a few things to keep in mind:
+
+* ``read_file=False`` does not imply ``write_file=False``, all controls are
+ independent.
+* XSL parsing (``xsl:import``, etc.) is not affected by this mechanism
+* ``read_file`` only applies to files in the file system. Any custom schemes
+ for URLs or URIs are controlled via the ``*_network`` keywords.
+
Modified: lxml/branch/xslt-access-control/src/lxml/xslt.pxd
==============================================================================
--- lxml/branch/xslt-access-control/src/lxml/xslt.pxd (original)
+++ lxml/branch/xslt-access-control/src/lxml/xslt.pxd Wed May 17 00:14:20 2006
@@ -19,6 +19,7 @@
xsltStylesheet* style
xmlXPathContext* xpathCtxt
xsltDocument* document
+ void* _private
cdef xsltStylesheet* xsltParseStylesheetDoc(xmlDoc* doc)
cdef void xsltFreeStylesheet(xsltStylesheet* sheet)
@@ -69,6 +70,33 @@
void* ctxt,
void (*handler)(void* ctxt, char* msg, ...))
+cdef extern from "libxslt/security.h":
+ ctypedef struct xsltSecurityPrefs
+ ctypedef enum xsltSecurityOption:
+ XSLT_SECPREF_READ_FILE = 1
+ XSLT_SECPREF_WRITE_FILE = 2
+ XSLT_SECPREF_CREATE_DIRECTORY = 3
+ XSLT_SECPREF_READ_NETWORK = 4
+ XSLT_SECPREF_WRITE_NETWORK = 5
+
+ ctypedef int (*xsltSecurityCheck)(xsltSecurityPrefs* sec,
+ xsltTransformContext* ctxt,
+ char* value)
+
+ cdef xsltSecurityPrefs* xsltNewSecurityPrefs()
+ cdef void xsltFreeSecurityPrefs(xsltSecurityPrefs* sec)
+ cdef int xsltSecurityForbid(xsltSecurityPrefs* sec,
+ xsltTransformContext* ctxt,
+ char* value)
+ cdef int xsltSecurityAllow(xsltSecurityPrefs* sec,
+ xsltTransformContext* ctxt,
+ char* value)
+ cdef int xsltSetSecurityPrefs(xsltSecurityPrefs* sec,
+ xsltSecurityOption option,
+ xsltSecurityCheck func)
+ cdef int xsltSetCtxtSecurityPrefs(xsltSecurityPrefs* sec,
+ xsltTransformContext* ctxt)
+
cdef extern from "libxslt/extra.h":
cdef char* XSLT_LIBXSLT_NAMESPACE
cdef char* XSLT_XALAN_NAMESPACE
Modified: lxml/branch/xslt-access-control/src/lxml/xslt.pxi
==============================================================================
--- lxml/branch/xslt-access-control/src/lxml/xslt.pxi (original)
+++ lxml/branch/xslt-access-control/src/lxml/xslt.pxi Wed May 17 00:14:20 2006
@@ -17,6 +17,9 @@
class XSLTExtensionError(XSLTError):
pass
+class XSLTAccessDeniedError(XSLTError):
+ pass
+
# version information
LIBXSLT_COMPILED_VERSION = __unpackIntVersion(xslt.LIBXSLT_VERSION)
LIBXSLT_VERSION = __unpackIntVersion(xslt.xsltLibxsltVersion)
@@ -122,6 +125,134 @@
xslt.xsltSetLoaderFunc(_doc_loader)
+################################################################################
+# XSLT file/network access control
+
+cdef object __ACCESS_METHOD_MAP
+__ACCESS_METHOD_MAP = {
+ xslt.XSLT_SECPREF_READ_FILE : "check_read_file",
+ xslt.XSLT_SECPREF_WRITE_FILE : "check_write_file",
+ xslt.XSLT_SECPREF_CREATE_DIRECTORY : "check_create_dir",
+ xslt.XSLT_SECPREF_READ_NETWORK : "check_read_network",
+ xslt.XSLT_SECPREF_WRITE_NETWORK : "check_write_network",
+ }
+
+cdef class XSLTAccessControl:
+ """Access control for XSLT: reading/writing files, directories and network
+ access.
+
+ Access is granted to a type of resource via keyword arguments or for
+ specific URLs by subclassing and implementing filter methods 'check_*'
+ that return a truth value for their URL string argument:
+
+ * read_file
+ * write_file
+ * create_dir
+ * read_network
+ * write_network
+ """
+ cdef xslt.xsltSecurityPrefs* _prefs
+ def __init__(self, read_file=None, write_file=None, create_dir=None,
+ read_network=None, write_network=None):
+ self._prefs = xslt.xsltNewSecurityPrefs()
+ if self._prefs is NULL:
+ raise XSLTError, "Error preparing access control context"
+ self._setAccess(xslt.XSLT_SECPREF_READ_FILE, read_file)
+ self._setAccess(xslt.XSLT_SECPREF_WRITE_FILE, write_file)
+ self._setAccess(xslt.XSLT_SECPREF_CREATE_DIRECTORY, create_dir)
+ self._setAccess(xslt.XSLT_SECPREF_READ_NETWORK, read_network)
+ self._setAccess(xslt.XSLT_SECPREF_WRITE_NETWORK, write_network)
+
+ def __dealloc__(self):
+ if self._prefs is not NULL:
+ xslt.xsltFreeSecurityPrefs(self._prefs)
+
+ cdef _setAccess(self, xslt.xsltSecurityOption option, allow):
+ cdef xslt.xsltSecurityCheck function
+ if allow is None:
+ # check if the corresponding method is defined
+ method_name = __ACCESS_METHOD_MAP.get(option, None)
+ if method_name is None:
+ function = xslt.xsltSecurityAllow
+ elif hasattr(self, method_name):
+ if option == xslt.XSLT_SECPREF_READ_FILE:
+ function = _checkFileRead
+ elif option == xslt.XSLT_SECPREF_WRITE_FILE:
+ function = _checkFileWrite
+ elif option == xslt.XSLT_SECPREF_CREATE_DIRECTORY:
+ function = _checkDirCreate
+ elif option == xslt.XSLT_SECPREF_READ_NETWORK:
+ function = _checkNetworkRead
+ elif option == xslt.XSLT_SECPREF_WRITE_NETWORK:
+ function = _checkNetworkWrite
+ else:
+ function = xslt.xsltSecurityAllow
+ else:
+ function = xslt.xsltSecurityAllow
+ elif allow:
+ function = xslt.xsltSecurityAllow
+ else:
+ function = xslt.xsltSecurityForbid
+ xslt.xsltSetSecurityPrefs(self._prefs, option, function)
+
+ cdef void _register_in_context(self, xslt.xsltTransformContext* ctxt):
+ ctxt._private = self
+ xslt.xsltSetCtxtSecurityPrefs(self._prefs, ctxt)
+
+cdef int _checkFileRead(xslt.xsltSecurityPrefs* sec,
+ xslt.xsltTransformContext* ctxt, char* value):
+ cdef XSLTAccessControl access_control
+ if ctxt is NULL or ctxt._private is NULL:
+ return 1 # no access control => allow everything
+ access_control = ctxt._private
+ try:
+ return bool( access_control.check_read_file(value) )
+ except Exception:
+ return 0
+
+cdef int _checkFileWrite(xslt.xsltSecurityPrefs* sec,
+ xslt.xsltTransformContext* ctxt, char* value):
+ cdef XSLTAccessControl access_control
+ if ctxt is NULL or ctxt._private is NULL:
+ return 1 # no access control => allow everything
+ access_control = ctxt._private
+ try:
+ return bool( access_control.check_write_file(value) )
+ except Exception:
+ return 0
+
+cdef int _checkDirCreate(xslt.xsltSecurityPrefs* sec,
+ xslt.xsltTransformContext* ctxt, char* value):
+ cdef XSLTAccessControl access_control
+ if ctxt is NULL or ctxt._private is NULL:
+ return 1 # no access control => allow everything
+ access_control = ctxt._private
+ try:
+ return bool( access_control.check_create_dir(value) )
+ except Exception:
+ return 0
+
+cdef int _checkNetworkRead(xslt.xsltSecurityPrefs* sec,
+ xslt.xsltTransformContext* ctxt, char* value):
+ cdef XSLTAccessControl access_control
+ if ctxt is NULL or ctxt._private is NULL:
+ return 1 # no access control => allow everything
+ access_control = ctxt._private
+ try:
+ return bool( access_control.check_read_network(value) )
+ except Exception:
+ return 0
+
+cdef int _checkNetworkWrite(xslt.xsltSecurityPrefs* sec,
+ xslt.xsltTransformContext* ctxt, char* value):
+ cdef XSLTAccessControl access_control
+ if ctxt is NULL or ctxt._private is NULL:
+ return 1 # no access control => allow everything
+ access_control = ctxt._private
+ try:
+ return bool( access_control.check_write_network(value) )
+ except Exception:
+ return 0
################################################################################
# XSLT
@@ -157,7 +288,7 @@
self._extensions = {}
python.PyDict_SetItem(self._extensions, (ns_utf, name_utf), function)
-cdef class _ExsltRegExp # forward declaration
+cdef class _ExsltRegExp # forward declarations
cdef class XSLT:
"""Turn a document into an XSLT object.
@@ -165,10 +296,11 @@
cdef _XSLTContext _context
cdef xslt.xsltStylesheet* _c_style
cdef _XSLTResolverContext _xslt_resolver_context
+ cdef XSLTAccessControl _access_control
cdef _ExsltRegExp _regexp
cdef _ErrorLog _error_log
- def __init__(self, xslt_input, extensions=None, regexp=True):
+ def __init__(self, xslt_input, extensions=None, regexp=True, access_control=None):
cdef xslt.xsltStylesheet* c_style
cdef xmlDoc* c_doc
cdef xmlDoc* fake_c_doc
@@ -178,6 +310,9 @@
doc = _documentOrRaise(xslt_input)
root_node = _rootNodeOf(xslt_input)
+ # set access control or raise TypeError
+ self._access_control = access_control
+
# make a copy of the document as stylesheet parsing modifies it
fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
c_doc = tree.xmlCopyDoc(fake_c_doc, 1)
@@ -253,6 +388,9 @@
xslt.xsltSetTransformErrorFunc(transform_ctxt, self._error_log,
_receiveGenericError)
+ if self._access_control is not None:
+ self._access_control._register_in_context(transform_ctxt)
+
ptemp = c_doc._private
c_doc._private = resolver_context
@@ -300,7 +438,13 @@
self._xslt_resolver_context._raise_if_stored()
if c_result is NULL:
- raise XSLTApplyError, "Error applying stylesheet"
+ message = "Error applying stylesheet"
+ errors = self._error_log.filter_from_errors()
+ if errors:
+ error = errors[-1]
+ if error.message:
+ message = error.message
+ raise XSLTApplyError, message
result_doc = _documentFactory(c_result, input_doc._parser)
return _xsltResultTreeFactory(result_doc, self)
@@ -369,6 +513,7 @@
# enable EXSLT support for XSLT
xslt.exsltRegisterAll()
+# extension function lookup for XSLT
cdef xpath.xmlXPathFunction _xslt_function_check(void* ctxt,
char* c_name, char* c_ns_uri):
"Find XSLT extension function from set of XPath and XSLT functions"
From scoder at codespeak.net Wed May 17 01:25:17 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 01:25:17 +0200 (CEST)
Subject: [Lxml-checkins] r27314 - lxml/branch/xslt-access-control/src/lxml
Message-ID: <20060516232517.4749310061@code0.codespeak.net>
Author: scoder
Date: Wed May 17 01:25:15 2006
New Revision: 27314
Modified:
lxml/branch/xslt-access-control/src/lxml/xslt.pxi
Log:
cleanup
Modified: lxml/branch/xslt-access-control/src/lxml/xslt.pxi
==============================================================================
--- lxml/branch/xslt-access-control/src/lxml/xslt.pxi (original)
+++ lxml/branch/xslt-access-control/src/lxml/xslt.pxi Wed May 17 01:25:15 2006
@@ -29,6 +29,18 @@
################################################################################
+# Where do we store what?
+#
+# xsltStylesheet->doc->_private
+# == _XSLTResolverContext for XSL stylesheet
+#
+# xsltTransformContext->document->doc->_private
+# == _XSLTResolverContext for transformed document
+#
+################################################################################
+
+
+################################################################################
# XSLT document loaders
cdef class _XSLTResolverContext(_ResolverContext):
@@ -199,60 +211,51 @@
ctxt._private = self
xslt.xsltSetCtxtSecurityPrefs(self._prefs, ctxt)
-cdef int _checkFileRead(xslt.xsltSecurityPrefs* sec,
- xslt.xsltTransformContext* ctxt, char* value):
+cdef int _checkAccess(xslt.xsltTransformContext* ctxt,
+ char* c_value, method_name):
+ cdef xmlDoc* c_doc
cdef XSLTAccessControl access_control
if ctxt is NULL or ctxt._private is NULL:
return 1 # no access control => allow everything
access_control = ctxt._private
try:
- return bool( access_control.check_read_file(value) )
+ if c_value is NULL:
+ value = None
+ else:
+ value = c_value
+ method = getattr(access_control, method_name, None)
+ if method is not None:
+ return bool( method(value) )
+ else:
+ return 1
except Exception:
+ # try to store exception in current resolver context
+ c_doc = ctxt.style.doc
+ if c_doc is not NULL and c_doc._private is not NULL:
+ if isinstance(c_doc._private, _XSLTResolverContext):
+ resolver_context = <_XSLTResolverContext>c_doc._private
+ resolver_context._store_raised()
return 0
+cdef int _checkFileRead(xslt.xsltSecurityPrefs* sec,
+ xslt.xsltTransformContext* ctxt, char* value):
+ return _checkAccess(ctxt, value, 'check_read_file')
+
cdef int _checkFileWrite(xslt.xsltSecurityPrefs* sec,
xslt.xsltTransformContext* ctxt, char* value):
- cdef XSLTAccessControl access_control
- if ctxt is NULL or ctxt._private is NULL:
- return 1 # no access control => allow everything
- access_control = ctxt._private
- try:
- return bool( access_control.check_write_file(value) )
- except Exception:
- return 0
+ return _checkAccess(ctxt, value, 'check_write_file')
cdef int _checkDirCreate(xslt.xsltSecurityPrefs* sec,
xslt.xsltTransformContext* ctxt, char* value):
- cdef XSLTAccessControl access_control
- if ctxt is NULL or ctxt._private is NULL:
- return 1 # no access control => allow everything
- access_control = ctxt._private
- try:
- return bool( access_control.check_create_dir(value) )
- except Exception:
- return 0
+ return _checkAccess(ctxt, value, 'check_create_dir')
cdef int _checkNetworkRead(xslt.xsltSecurityPrefs* sec,
xslt.xsltTransformContext* ctxt, char* value):
- cdef XSLTAccessControl access_control
- if ctxt is NULL or ctxt._private is NULL:
- return 1 # no access control => allow everything
- access_control = ctxt._private
- try:
- return bool( access_control.check_read_network(value) )
- except Exception:
- return 0
+ return _checkAccess(ctxt, value, 'check_read_network')
cdef int _checkNetworkWrite(xslt.xsltSecurityPrefs* sec,
xslt.xsltTransformContext* ctxt, char* value):
- cdef XSLTAccessControl access_control
- if ctxt is NULL or ctxt._private is NULL:
- return 1 # no access control => allow everything
- access_control = ctxt._private
- try:
- return bool( access_control.check_write_network(value) )
- except Exception:
- return 0
+ return _checkAccess(ctxt, value, 'check_write_network')
################################################################################
# XSLT
From scoder at codespeak.net Wed May 17 01:26:48 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 01:26:48 +0200 (CEST)
Subject: [Lxml-checkins] r27315 - lxml/branch/xslt-access-control/doc
Message-ID: <20060516232648.3B43110061@code0.codespeak.net>
Author: scoder
Date: Wed May 17 01:26:46 2006
New Revision: 27315
Modified:
lxml/branch/xslt-access-control/doc/resolvers.txt
Log:
added broken doctest for broken fine-grained access extension (problem seems to be libxslt here, see libxslt bug #342045)
Modified: lxml/branch/xslt-access-control/doc/resolvers.txt
==============================================================================
--- lxml/branch/xslt-access-control/doc/resolvers.txt (original)
+++ lxml/branch/xslt-access-control/doc/resolvers.txt Wed May 17 01:26:46 2006
@@ -191,12 +191,12 @@
I/O access control in XSLT
--------------------------
-XSLT has an additional mechanism to control the access certain I/O operations
-during the transformation process. This is most interesting where XSL scripts
-come from potentially insecure sources and must be prevented from modifying
-the local file system. Note, however, that there is no way to keep them from
-eating up your precious CPU time, so this should not stop you from thinking
-about what XSLT you execute.
+XSLT has an additional mechanism to control the access to certain I/O
+operations during the transformation process. This is most interesting where
+XSL scripts come from potentially insecure sources and must be prevented from
+modifying the local file system. Note, however, that there is no way to keep
+them from eating up your precious CPU time, so this should not stop you from
+thinking about what XSLT you execute.
Access control is configured using the XSLTAccessControl class. It can be
called with a number of keyword arguments that allow or deny specific
@@ -218,9 +218,71 @@
There are a few things to keep in mind:
+* XSL parsing (``xsl:import``, etc.) is not affected by this mechanism
* ``read_file=False`` does not imply ``write_file=False``, all controls are
independent.
-* XSL parsing (``xsl:import``, etc.) is not affected by this mechanism
* ``read_file`` only applies to files in the file system. Any custom schemes
- for URLs or URIs are controlled via the ``*_network`` keywords.
+ for URLs (not URIs) are controlled via the ``*_network`` keywords.
+
+################################################################################
+# BROKEN FROM HERE
+################################################################################
+
+If switching access on and off is not fine-grained enough for you purpose, you
+can customize the XSLTAccessControl class by subclassing it and implementing
+any of the special methods ``check_read_file``, ``check_write_file``, etc.::
+
+ >>> class NetReadAccessControl(etree.XSLTAccessControl):
+ ... prefix = 'hoi:'
+ ... def check_read_network(self, uri):
+ ... if not uri:
+ ... return 1
+ ... return not uri.startswith(self.prefix)
+
+ >>> ac = NetReadAccessControl()
+
+
+ >>> xml_text = """\
+ ...
+ ...
+ ...
+ ...
+ ...
+ ...
+ ...
+ ...
+ ... """
+ >>> xslt = etree.XML(xml_text)
+
+ >>> transform = etree.XSLT(honk_doc, access_control=ac)
+ Resolving url honk:test as prefix honk ... done
+ >>> result = transform(normal_doc)
+ Resolving url hoi:test as prefix honk ... failed
+ Resolving url hoi:test as prefix hoi ... done
+
+# Traceback (most recent call last):
+# [...]
+# XSLTApplyError: runtime error (element 'value-of')
+
+ >>> ac.prefix = 'honk:'
+ >>> transform = etree.XSLT(honk_doc, access_control=ac)
+ Resolving url honk:test as prefix honk ... done
+ >>> result = transform(normal_doc)
+ Resolving url hoi:test as prefix honk ... failed
+ Resolving url hoi:test as prefix hoi ... done
+
+# Traceback (most recent call last):
+# [...]
+# XSLTApplyError: runtime error (element 'value-of')
+
+ >>> ac.prefix = 'IGNORE-ME:'
+ >>> transform = etree.XSLT(honk_doc, access_control=ac)
+ Resolving url honk:test as prefix honk ... done
+ >>> result = transform(normal_doc)
+ Resolving url hoi:test as prefix honk ... failed
+ Resolving url hoi:test as prefix hoi ... done
+# Traceback (most recent call last):
+# [...]
+# XSLTApplyError: runtime error (element 'value-of')
From scoder at codespeak.net Wed May 17 08:13:00 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 08:13:00 +0200 (CEST)
Subject: [Lxml-checkins] r27317 - in lxml/branch/xslt-access-control: doc
src/lxml
Message-ID: <20060517061300.4E0F4100AB@code0.codespeak.net>
Author: scoder
Date: Wed May 17 08:12:57 2006
New Revision: 27317
Modified:
lxml/branch/xslt-access-control/doc/resolvers.txt
lxml/branch/xslt-access-control/src/lxml/xslt.pxi
Log:
removed broken support for more fine-grained access control (overlaps with document loaders anyway)
Modified: lxml/branch/xslt-access-control/doc/resolvers.txt
==============================================================================
--- lxml/branch/xslt-access-control/doc/resolvers.txt (original)
+++ lxml/branch/xslt-access-control/doc/resolvers.txt Wed May 17 08:12:57 2006
@@ -221,68 +221,8 @@
* XSL parsing (``xsl:import``, etc.) is not affected by this mechanism
* ``read_file=False`` does not imply ``write_file=False``, all controls are
independent.
-* ``read_file`` only applies to files in the file system. Any custom schemes
- for URLs (not URIs) are controlled via the ``*_network`` keywords.
-
-################################################################################
-# BROKEN FROM HERE
-################################################################################
-
-If switching access on and off is not fine-grained enough for you purpose, you
-can customize the XSLTAccessControl class by subclassing it and implementing
-any of the special methods ``check_read_file``, ``check_write_file``, etc.::
-
- >>> class NetReadAccessControl(etree.XSLTAccessControl):
- ... prefix = 'hoi:'
- ... def check_read_network(self, uri):
- ... if not uri:
- ... return 1
- ... return not uri.startswith(self.prefix)
-
- >>> ac = NetReadAccessControl()
-
-
- >>> xml_text = """\
- ...
- ...
- ...
- ...
- ...
- ...
- ...
- ...
- ... """
- >>> xslt = etree.XML(xml_text)
-
- >>> transform = etree.XSLT(honk_doc, access_control=ac)
- Resolving url honk:test as prefix honk ... done
- >>> result = transform(normal_doc)
- Resolving url hoi:test as prefix honk ... failed
- Resolving url hoi:test as prefix hoi ... done
-
-# Traceback (most recent call last):
-# [...]
-# XSLTApplyError: runtime error (element 'value-of')
-
- >>> ac.prefix = 'honk:'
- >>> transform = etree.XSLT(honk_doc, access_control=ac)
- Resolving url honk:test as prefix honk ... done
- >>> result = transform(normal_doc)
- Resolving url hoi:test as prefix honk ... failed
- Resolving url hoi:test as prefix hoi ... done
-
-# Traceback (most recent call last):
-# [...]
-# XSLTApplyError: runtime error (element 'value-of')
-
- >>> ac.prefix = 'IGNORE-ME:'
- >>> transform = etree.XSLT(honk_doc, access_control=ac)
- Resolving url honk:test as prefix honk ... done
- >>> result = transform(normal_doc)
- Resolving url hoi:test as prefix honk ... failed
- Resolving url hoi:test as prefix hoi ... done
-
-# Traceback (most recent call last):
-# [...]
-# XSLTApplyError: runtime error (element 'value-of')
+* ``read_file`` only applies to files in the file system. Any other scheme
+ for URLs is controlled by the ``*_network`` keywords.
+* If you need more fine-grained control than switching access on and off, you
+ should consider writing a custom document loader that returns empty
+ documents or raises exceptions if access is denied.
Modified: lxml/branch/xslt-access-control/src/lxml/xslt.pxi
==============================================================================
--- lxml/branch/xslt-access-control/src/lxml/xslt.pxi (original)
+++ lxml/branch/xslt-access-control/src/lxml/xslt.pxi Wed May 17 08:12:57 2006
@@ -17,9 +17,6 @@
class XSLTExtensionError(XSLTError):
pass
-class XSLTAccessDeniedError(XSLTError):
- pass
-
# version information
LIBXSLT_COMPILED_VERSION = __unpackIntVersion(xslt.LIBXSLT_VERSION)
LIBXSLT_VERSION = __unpackIntVersion(xslt.xsltLibxsltVersion)
@@ -140,22 +137,10 @@
################################################################################
# XSLT file/network access control
-cdef object __ACCESS_METHOD_MAP
-__ACCESS_METHOD_MAP = {
- xslt.XSLT_SECPREF_READ_FILE : "check_read_file",
- xslt.XSLT_SECPREF_WRITE_FILE : "check_write_file",
- xslt.XSLT_SECPREF_CREATE_DIRECTORY : "check_create_dir",
- xslt.XSLT_SECPREF_READ_NETWORK : "check_read_network",
- xslt.XSLT_SECPREF_WRITE_NETWORK : "check_write_network",
- }
-
cdef class XSLTAccessControl:
"""Access control for XSLT: reading/writing files, directories and network
- access.
-
- Access is granted to a type of resource via keyword arguments or for
- specific URLs by subclassing and implementing filter methods 'check_*'
- that return a truth value for their URL string argument:
+ access. Access to a type of resource is granted or denied by passing the
+ following keyword arguments. All of them default to True.
* read_file
* write_file
@@ -164,8 +149,8 @@
* write_network
"""
cdef xslt.xsltSecurityPrefs* _prefs
- def __init__(self, read_file=None, write_file=None, create_dir=None,
- read_network=None, write_network=None):
+ def __init__(self, read_file=True, write_file=True, create_dir=True,
+ read_network=True, write_network=True):
self._prefs = xslt.xsltNewSecurityPrefs()
if self._prefs is NULL:
raise XSLTError, "Error preparing access control context"
@@ -181,27 +166,7 @@
cdef _setAccess(self, xslt.xsltSecurityOption option, allow):
cdef xslt.xsltSecurityCheck function
- if allow is None:
- # check if the corresponding method is defined
- method_name = __ACCESS_METHOD_MAP.get(option, None)
- if method_name is None:
- function = xslt.xsltSecurityAllow
- elif hasattr(self, method_name):
- if option == xslt.XSLT_SECPREF_READ_FILE:
- function = _checkFileRead
- elif option == xslt.XSLT_SECPREF_WRITE_FILE:
- function = _checkFileWrite
- elif option == xslt.XSLT_SECPREF_CREATE_DIRECTORY:
- function = _checkDirCreate
- elif option == xslt.XSLT_SECPREF_READ_NETWORK:
- function = _checkNetworkRead
- elif option == xslt.XSLT_SECPREF_WRITE_NETWORK:
- function = _checkNetworkWrite
- else:
- function = xslt.xsltSecurityAllow
- else:
- function = xslt.xsltSecurityAllow
- elif allow:
+ if allow:
function = xslt.xsltSecurityAllow
else:
function = xslt.xsltSecurityForbid
@@ -211,52 +176,6 @@
ctxt._private = self
xslt.xsltSetCtxtSecurityPrefs(self._prefs, ctxt)
-cdef int _checkAccess(xslt.xsltTransformContext* ctxt,
- char* c_value, method_name):
- cdef xmlDoc* c_doc
- cdef XSLTAccessControl access_control
- if ctxt is NULL or ctxt._private is NULL:
- return 1 # no access control => allow everything
- access_control = ctxt._private
- try:
- if c_value is NULL:
- value = None
- else:
- value = c_value
- method = getattr(access_control, method_name, None)
- if method is not None:
- return bool( method(value) )
- else:
- return 1
- except Exception:
- # try to store exception in current resolver context
- c_doc = ctxt.style.doc
- if c_doc is not NULL and c_doc._private is not NULL:
- if isinstance(c_doc._private, _XSLTResolverContext):
- resolver_context = <_XSLTResolverContext>c_doc._private
- resolver_context._store_raised()
- return 0
-
-cdef int _checkFileRead(xslt.xsltSecurityPrefs* sec,
- xslt.xsltTransformContext* ctxt, char* value):
- return _checkAccess(ctxt, value, 'check_read_file')
-
-cdef int _checkFileWrite(xslt.xsltSecurityPrefs* sec,
- xslt.xsltTransformContext* ctxt, char* value):
- return _checkAccess(ctxt, value, 'check_write_file')
-
-cdef int _checkDirCreate(xslt.xsltSecurityPrefs* sec,
- xslt.xsltTransformContext* ctxt, char* value):
- return _checkAccess(ctxt, value, 'check_create_dir')
-
-cdef int _checkNetworkRead(xslt.xsltSecurityPrefs* sec,
- xslt.xsltTransformContext* ctxt, char* value):
- return _checkAccess(ctxt, value, 'check_read_network')
-
-cdef int _checkNetworkWrite(xslt.xsltSecurityPrefs* sec,
- xslt.xsltTransformContext* ctxt, char* value):
- return _checkAccess(ctxt, value, 'check_write_network')
-
################################################################################
# XSLT
@@ -291,7 +210,7 @@
self._extensions = {}
python.PyDict_SetItem(self._extensions, (ns_utf, name_utf), function)
-cdef class _ExsltRegExp # forward declarations
+cdef class _ExsltRegExp # forward declaration
cdef class XSLT:
"""Turn a document into an XSLT object.
From scoder at codespeak.net Wed May 17 08:32:43 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 08:32:43 +0200 (CEST)
Subject: [Lxml-checkins] r27318 - in lxml/trunk: . doc src/lxml
Message-ID: <20060517063243.252FC100AB@code0.codespeak.net>
Author: scoder
Date: Wed May 17 08:32:40 2006
New Revision: 27318
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/resolvers.txt
lxml/trunk/src/lxml/xslt.pxd
lxml/trunk/src/lxml/xslt.pxi
Log:
merged in XSLT access control from xslt-access-control branch
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed May 17 08:32:40 2006
@@ -7,6 +7,8 @@
Features added
--------------
+* XSLT can block access to file system and network via ``XSLTAccessControl``
+
* ElementTree.write() no longer serializes in memory (reduced memory
footprint)
Modified: lxml/trunk/doc/resolvers.txt
==============================================================================
--- lxml/trunk/doc/resolvers.txt (original)
+++ lxml/trunk/doc/resolvers.txt Wed May 17 08:32:40 2006
@@ -186,3 +186,43 @@
inherits. For XSLT, the output document inherits the resolvers of the input
document and not those of the stylesheet. Therefore, the last result does not
inherit any resolvers at all.
+
+
+I/O access control in XSLT
+--------------------------
+
+XSLT has an additional mechanism to control the access to certain I/O
+operations during the transformation process. This is most interesting where
+XSL scripts come from potentially insecure sources and must be prevented from
+modifying the local file system. Note, however, that there is no way to keep
+them from eating up your precious CPU time, so this should not stop you from
+thinking about what XSLT you execute.
+
+Access control is configured using the XSLTAccessControl class. It can be
+called with a number of keyword arguments that allow or deny specific
+operations::
+
+ >>> transform = etree.XSLT(honk_doc)
+ Resolving url honk:test as prefix honk ... done
+ >>> result = transform(normal_doc)
+ Resolving url hoi:test as prefix honk ... failed
+ Resolving url hoi:test as prefix hoi ... done
+
+ >>> ac = etree.XSLTAccessControl(read_network=False)
+ >>> transform = etree.XSLT(honk_doc, access_control=ac)
+ Resolving url honk:test as prefix honk ... done
+ >>> result = transform(normal_doc)
+ Traceback (most recent call last):
+ [...]
+ XSLTApplyError: runtime error (element 'value-of')
+
+There are a few things to keep in mind:
+
+* XSL parsing (``xsl:import``, etc.) is not affected by this mechanism
+* ``read_file=False`` does not imply ``write_file=False``, all controls are
+ independent.
+* ``read_file`` only applies to files in the file system. Any other scheme
+ for URLs is controlled by the ``*_network`` keywords.
+* If you need more fine-grained control than switching access on and off, you
+ should consider writing a custom document loader that returns empty
+ documents or raises exceptions if access is denied.
Modified: lxml/trunk/src/lxml/xslt.pxd
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxd (original)
+++ lxml/trunk/src/lxml/xslt.pxd Wed May 17 08:32:40 2006
@@ -69,6 +69,33 @@
void* ctxt,
void (*handler)(void* ctxt, char* msg, ...))
+cdef extern from "libxslt/security.h":
+ ctypedef struct xsltSecurityPrefs
+ ctypedef enum xsltSecurityOption:
+ XSLT_SECPREF_READ_FILE = 1
+ XSLT_SECPREF_WRITE_FILE = 2
+ XSLT_SECPREF_CREATE_DIRECTORY = 3
+ XSLT_SECPREF_READ_NETWORK = 4
+ XSLT_SECPREF_WRITE_NETWORK = 5
+
+ ctypedef int (*xsltSecurityCheck)(xsltSecurityPrefs* sec,
+ xsltTransformContext* ctxt,
+ char* value)
+
+ cdef xsltSecurityPrefs* xsltNewSecurityPrefs()
+ cdef void xsltFreeSecurityPrefs(xsltSecurityPrefs* sec)
+ cdef int xsltSecurityForbid(xsltSecurityPrefs* sec,
+ xsltTransformContext* ctxt,
+ char* value)
+ cdef int xsltSecurityAllow(xsltSecurityPrefs* sec,
+ xsltTransformContext* ctxt,
+ char* value)
+ cdef int xsltSetSecurityPrefs(xsltSecurityPrefs* sec,
+ xsltSecurityOption option,
+ xsltSecurityCheck func)
+ cdef int xsltSetCtxtSecurityPrefs(xsltSecurityPrefs* sec,
+ xsltTransformContext* ctxt)
+
cdef extern from "libxslt/extra.h":
cdef char* XSLT_LIBXSLT_NAMESPACE
cdef char* XSLT_XALAN_NAMESPACE
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Wed May 17 08:32:40 2006
@@ -26,6 +26,18 @@
################################################################################
+# Where do we store what?
+#
+# xsltStylesheet->doc->_private
+# == _XSLTResolverContext for XSL stylesheet
+#
+# xsltTransformContext->document->doc->_private
+# == _XSLTResolverContext for transformed document
+#
+################################################################################
+
+
+################################################################################
# XSLT document loaders
cdef class _XSLTResolverContext(_ResolverContext):
@@ -122,6 +134,46 @@
xslt.xsltSetLoaderFunc(_doc_loader)
+################################################################################
+# XSLT file/network access control
+
+cdef class XSLTAccessControl:
+ """Access control for XSLT: reading/writing files, directories and network
+ access. Access to a type of resource is granted or denied by passing the
+ following keyword arguments. All of them default to True.
+
+ * read_file
+ * write_file
+ * create_dir
+ * read_network
+ * write_network
+ """
+ cdef xslt.xsltSecurityPrefs* _prefs
+ def __init__(self, read_file=True, write_file=True, create_dir=True,
+ read_network=True, write_network=True):
+ self._prefs = xslt.xsltNewSecurityPrefs()
+ if self._prefs is NULL:
+ raise XSLTError, "Error preparing access control context"
+ self._setAccess(xslt.XSLT_SECPREF_READ_FILE, read_file)
+ self._setAccess(xslt.XSLT_SECPREF_WRITE_FILE, write_file)
+ self._setAccess(xslt.XSLT_SECPREF_CREATE_DIRECTORY, create_dir)
+ self._setAccess(xslt.XSLT_SECPREF_READ_NETWORK, read_network)
+ self._setAccess(xslt.XSLT_SECPREF_WRITE_NETWORK, write_network)
+
+ def __dealloc__(self):
+ if self._prefs is not NULL:
+ xslt.xsltFreeSecurityPrefs(self._prefs)
+
+ cdef _setAccess(self, xslt.xsltSecurityOption option, allow):
+ cdef xslt.xsltSecurityCheck function
+ if allow:
+ function = xslt.xsltSecurityAllow
+ else:
+ function = xslt.xsltSecurityForbid
+ xslt.xsltSetSecurityPrefs(self._prefs, option, function)
+
+ cdef void _register_in_context(self, xslt.xsltTransformContext* ctxt):
+ xslt.xsltSetCtxtSecurityPrefs(self._prefs, ctxt)
################################################################################
# XSLT
@@ -165,10 +217,11 @@
cdef _XSLTContext _context
cdef xslt.xsltStylesheet* _c_style
cdef _XSLTResolverContext _xslt_resolver_context
+ cdef XSLTAccessControl _access_control
cdef _ExsltRegExp _regexp
cdef _ErrorLog _error_log
- def __init__(self, xslt_input, extensions=None, regexp=True):
+ def __init__(self, xslt_input, extensions=None, regexp=True, access_control=None):
cdef xslt.xsltStylesheet* c_style
cdef xmlDoc* c_doc
cdef xmlDoc* fake_c_doc
@@ -178,6 +231,9 @@
doc = _documentOrRaise(xslt_input)
root_node = _rootNodeOf(xslt_input)
+ # set access control or raise TypeError
+ self._access_control = access_control
+
# make a copy of the document as stylesheet parsing modifies it
fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
c_doc = tree.xmlCopyDoc(fake_c_doc, 1)
@@ -253,6 +309,9 @@
xslt.xsltSetTransformErrorFunc(transform_ctxt, self._error_log,
_receiveGenericError)
+ if self._access_control is not None:
+ self._access_control._register_in_context(transform_ctxt)
+
ptemp = c_doc._private
c_doc._private = resolver_context
@@ -300,7 +359,13 @@
self._xslt_resolver_context._raise_if_stored()
if c_result is NULL:
- raise XSLTApplyError, "Error applying stylesheet"
+ message = "Error applying stylesheet"
+ errors = self._error_log.filter_from_errors()
+ if errors:
+ error = errors[-1]
+ if error.message:
+ message = error.message
+ raise XSLTApplyError, message
result_doc = _documentFactory(c_result, input_doc._parser)
return _xsltResultTreeFactory(result_doc, self)
@@ -369,6 +434,7 @@
# enable EXSLT support for XSLT
xslt.exsltRegisterAll()
+# extension function lookup for XSLT
cdef xpath.xmlXPathFunction _xslt_function_check(void* ctxt,
char* c_name, char* c_ns_uri):
"Find XSLT extension function from set of XPath and XSLT functions"
From scoder at codespeak.net Wed May 17 08:33:08 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 08:33:08 +0200 (CEST)
Subject: [Lxml-checkins] r27319 - lxml/trunk/doc
Message-ID: <20060517063308.8F394100AB@code0.codespeak.net>
Author: scoder
Date: Wed May 17 08:33:07 2006
New Revision: 27319
Modified:
lxml/trunk/doc/api.txt
Log:
small cleanup in api.txt
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Wed May 17 08:33:07 2006
@@ -6,7 +6,7 @@
the need to expose a feature in an easy way led to the invention of a new API.
lxml.etree
-==========
+----------
lxml.etree tries to follow the `ElementTree API`_ wherever it can. There are
however some incompatibilities (see compatibility.txt). The extensions are
@@ -14,17 +14,17 @@
.. _`ElementTree API`: http://effbot.org/zone/element-index.htm
-The following examples usually assume this to be executed first::
-
- >>> from lxml import etree
- >>> from StringIO import StringIO
-
If you need to know which version of lxml is installed, you can access the
``lxml.etree.LXML_VERSION`` attribute to retrieve a version tuple. Note,
however, that it did not exist before version 1.0, so you will get an
AttributeError in older versions. The versions of libxml2 and libxslt are
available through the attributes ``LIBXML_VERSION`` and ``LIBXSLT_VERSION``.
+The following examples usually assume this to be executed first::
+
+ >>> from lxml import etree
+ >>> from StringIO import StringIO
+
Parsers
-------
From scoder at codespeak.net Wed May 17 08:34:27 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 08:34:27 +0200 (CEST)
Subject: [Lxml-checkins] r27320 - lxml/trunk/src/lxml
Message-ID: <20060517063427.DB204100AB@code0.codespeak.net>
Author: scoder
Date: Wed May 17 08:34:26 2006
New Revision: 27320
Modified:
lxml/trunk/src/lxml/xslt.pxi
Log:
doc updates
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Wed May 17 08:34:26 2006
@@ -140,7 +140,7 @@
cdef class XSLTAccessControl:
"""Access control for XSLT: reading/writing files, directories and network
access. Access to a type of resource is granted or denied by passing the
- following keyword arguments. All of them default to True.
+ following keyword arguments. All of them default to True to allow access.
* read_file
* write_file
From scoder at codespeak.net Wed May 17 08:42:43 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 08:42:43 +0200 (CEST)
Subject: [Lxml-checkins] r27321 - lxml/trunk/doc
Message-ID: <20060517064243.7FD90100AC@code0.codespeak.net>
Author: scoder
Date: Wed May 17 08:42:41 2006
New Revision: 27321
Modified:
lxml/trunk/doc/resolvers.txt
Log:
doc updates
Modified: lxml/trunk/doc/resolvers.txt
==============================================================================
--- lxml/trunk/doc/resolvers.txt (original)
+++ lxml/trunk/doc/resolvers.txt Wed May 17 08:42:41 2006
@@ -198,7 +198,7 @@
them from eating up your precious CPU time, so this should not stop you from
thinking about what XSLT you execute.
-Access control is configured using the XSLTAccessControl class. It can be
+Access control is configured using the ``XSLTAccessControl`` class. It can be
called with a number of keyword arguments that allow or deny specific
operations::
From scoder at codespeak.net Wed May 17 08:56:03 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 08:56:03 +0200 (CEST)
Subject: [Lxml-checkins] r27322 - lxml/trunk/src/lxml
Message-ID: <20060517065603.7C2721006F@code0.codespeak.net>
Author: scoder
Date: Wed May 17 08:56:02 2006
New Revision: 27322
Modified:
lxml/trunk/src/lxml/xslt.pxi
Log:
doc updates
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Wed May 17 08:56:02 2006
@@ -139,8 +139,9 @@
cdef class XSLTAccessControl:
"""Access control for XSLT: reading/writing files, directories and network
- access. Access to a type of resource is granted or denied by passing the
- following keyword arguments. All of them default to True to allow access.
+ I/O. Access to a type of resource is granted or denied by passing any of
+ the following keyword arguments. All of them default to True to allow
+ access.
* read_file
* write_file
From scoder at codespeak.net Wed May 17 09:56:17 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 09:56:17 +0200 (CEST)
Subject: [Lxml-checkins] r27325 - in lxml/trunk: . doc
Message-ID: <20060517075617.44196100B0@code0.codespeak.net>
Author: scoder
Date: Wed May 17 09:56:15 2006
New Revision: 27325
Added:
lxml/trunk/doc/build.txt
Modified:
lxml/trunk/INSTALL.txt
lxml/trunk/doc/main.txt
Log:
merged in main.txt updates from 0.9.x branch, new doc/build.txt that describes how to build lxml from sources (including static linking on Windows)
Modified: lxml/trunk/INSTALL.txt
==============================================================================
--- lxml/trunk/INSTALL.txt (original)
+++ lxml/trunk/INSTALL.txt Wed May 17 09:56:15 2006
@@ -30,11 +30,13 @@
than the required version above. While there were not any bug reports so far,
you may still encounter certain differences in behaviour in rare cases.
-If you want to build lxml from SVN, you also need Pyrex_. If you are using a
-released version of lxml, it should come with the generated C file in the
-source distribution, so no Pyrex is needed in that case.
+If you want to build lxml from SVN, you also need Pyrex_. Please read `how to
+build lxml from source`_ in this case. If you are using a released version of
+lxml, it should come with the generated C file in the source distribution, so
+no Pyrex is needed in that case.
.. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/
+.. _`how to build lxml from source`: build.html
Note that Pyrex up to and including version 0.9.4 has known problems when
compiling lxml with gcc 4.0 or Python 2.4. Do not use it. If you want to
@@ -59,49 +61,5 @@
This has been reported to work on Linux, MacOS-X 10.4 and Windows, as long as
libxml2 and libxslt are properly installed. To compile and install lxml
-without easy_install, download the source tar-ball, unpack it and type::
-
- python setup.py install
-
-If you do not want to install lxml right away, but first test it from the
-source directory, you can build it in-place like this::
-
- python setup.py build_ext -i
-
-or just::
-
- make
-
-If you then place lxml's "src" directory on your PYTHONPATH somehow, you can
-import lxml.etree and play with it.
-
-
-Running the tests and reporting errors
---------------------------------------
-
-The source distribution (tgz) contains a test suite for lxml. You can run it
-from the top-level directory::
-
- python test.py
-
-Note that the test script only tests the in-place build (see "Installation"
-above), as it searches the "src" directory. You can use the following
-one-step command to trigger an in-place build and test it::
-
- make clean test
-
-To run the ElementTree and cElementTree compatibility tests, make sure
-you have lxml on your PYTHONPATH first, then run::
-
- python selftest.py
-
-and::
-
- python selftest2.py
-
-If the tests give failures, errors, or worse, segmentation faults, we'd really
-like to know. Please contact us on the `mailing list`_, and please specify the
-version of lxml, libxml2, libxslt and Python you were using, as well as your
-operating system type (Linux, Windows, MacOs, ...).
-
-.. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev
+without easy_install, please read `how to build lxml from source`_ (or the
+file ``build.txt`` in the ``doc`` directory of the source tree).
Added: lxml/trunk/doc/build.txt
==============================================================================
--- (empty file)
+++ lxml/trunk/doc/build.txt Wed May 17 09:56:15 2006
@@ -0,0 +1,170 @@
+How to build lxml from source
+=============================
+
+To build lxml from source, you need libxml2 and libxslt properly installed.
+
+Pyrex
+-----
+
+The lxml.etree module is written in Pyrex_. To build lxml from source, you
+therefore need a working Pyrex installation. Pyrex now supports EasyInstall,
+so you can install it by running the following command as super-user::
+
+ easy_install Pyrex
+
+.. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/
+.. _easy_install: http://peak.telecommunity.com/DevCenter/EasyInstall
+
+Note that Pyrex up to and including version 0.9.4 has known problems when
+compiling lxml with gcc 4.0 or Python 2.4. Do not use it. If you want to
+build lxml from non-release sources, please install Pyrex version 0.9.4.1 or
+later.
+
+
+Subversion
+----------
+
+The lxml package is developed in a Subversion repository. You can retrieve
+the current developer version by calling::
+
+ svn co http://codespeak.net/svn/lxml/trunk lxml
+
+This will create a directory ``lxml`` and download the source into it. You
+can also `browse the repository through the web`_ or use your favourite SVN
+client to access it.
+
+.. _`browse the repository through the web`: http://codespeak.net/svn/lxml
+
+
+The distutils approach
+----------------------
+
+Usually, building lxml is done through distutils. Do a Subversion checkout
+(or download the source tar-ball and unpack it) and then type::
+
+ python setup.py build
+
+If you want to test lxml from the source directory, it is better to build it
+in-place like this::
+
+ python setup.py build_ext -i
+
+or, in Unix-like environments::
+
+ make
+
+If you then place lxml's "src" directory on your PYTHONPATH somehow, you can
+import ``lxml.etree`` and play with it.
+
+
+Running the tests and reporting errors
+--------------------------------------
+
+The source distribution (tgz) contains a test suite for lxml. You can run it
+from the top-level directory::
+
+ python test.py
+
+Note that the test script only tests the in-place build (see "Installation"
+above), as it searches the "src" directory. You can use the following
+one-step command to trigger an in-place build and test it::
+
+ make clean test
+
+To run the ElementTree and cElementTree compatibility tests, make sure
+you have lxml on your PYTHONPATH first, then run::
+
+ python selftest.py
+
+and::
+
+ python selftest2.py
+
+If the tests give failures, errors, or worse, segmentation faults, we'd really
+like to know. Please contact us on the `mailing list`_, and please specify the
+version of lxml, libxml2, libxslt and Python you were using, as well as your
+operating system type (Linux, Windows, MacOs, ...).
+
+.. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev
+
+
+Static linking on Windows
+-------------------------
+
+Most operating systems have proper package mangement that makes installing
+current versions of libxml2 and libxslt easy. However, Microsoft Windows
+lacks these capabilities. It can therefore be interesting to statically link
+the external libraries into lxml.etree to avoid having to install them
+separately. `David Sankel`_ proposed the following approach.
+
+.. _`David Sankel`: http://codespeak.net/pipermail/lxml-dev/2006-May/001196.html
+
+Download lxml and all required libraries to the same directory. The iconv,
+libxml2, libxslt, and zlib libraries are all availible from xmlsoft.org. The
+place to go on the ftp site is ftp://xmlsoft.org/libxml2/win32.
+
+Your directory should now have something like the following files in it::
+
+iconv-1.9.1.win32.zip
+libxml2-2.6.23.win32.zip
+libxslt-1.1.15.win32.zip
+lxml-0.9.2.tgz
+zlib-1.2.3.win32.zip
+
+Now extract each of those files in the _same_ directory. Now you should have
+something like this::
+
+iconv-1.9.1.win32/
+iconv-1.9.1.win32.zip
+libxml2-2.6.23.win32/
+libxml2-2.6.23.win32.zip
+libxslt-1.1.15.win32/
+libxslt-1.1.15.win32.zip
+lxml-0.9.2/
+lxml-0.9.2.tgz
+zlib-1.2.3.win32/
+zlib-1.2.3.win32.zip
+
+Go to the lxml-0.9.2 directory and edit the Makefile. There should be a
+section that looks like this::
+
+ ext_modules = [ Extension(
+ "lxml.etree",
+ sources = sources,
+ extra_compile_args = ['-w'] + flags('xslt-config --cflags'),
+ extra_link_args = flags('xslt-config --libs')
+ )],
+
+The problem here is that the Windows version of libxslt does not install the
+little program ``xslt-config``, which would normally auto-configure the build
+process.
+
+Change this section to something like this, but take care to use the correct
+version numbers::
+
+ ext_modules = [ Extension(
+ "lxml.etree",
+ sources = sources,
+ extra_compile_args = ['-w'] + [
+ "-I..\\libxml2-2.6.23.win32\\include ",
+ "-I..\\libxslt-1.1.15.win32\\include",
+ "-I..\\zlib-1.2.3.win32\\include",
+ "-I..\\iconv-1.9.1.win32\\include"
+ ],
+ extra_link_args = [
+ "..\\libxml2-2.6.23.win32\\lib\\libxml2_a.lib",
+ "..\\libxslt-1.1.15.win32\\lib\\libxslt_a.lib",
+ "..\\zlib-1.2.3.win32\\lib\\zlib.lib",
+ "..\\iconv- 1.9.1.win32\\lib\\iconv_a.lib"
+ ]
+ )],
+
+The ``_a`` part of the library names means that we are linking statically
+against the named library files. If you want to use DLLs, you need to link
+against the DLL version of the libraries.
+
+Now you should be able to use setup.py and everything should work well. Try calling::
+
+ python setup.py bdist_wininst
+
+This will create a windows installer in the ``pkg`` directory.
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Wed May 17 09:56:15 2006
@@ -13,49 +13,65 @@
.. _introduction: intro.html
-News
-----
+Download
+--------
-* 2006-03-30: `lxml 0.9.1`_ released (`changes for 0.9.1`_)
+The best way to download binary versions is to visit `lxml at the Python
+cheeseshop`_. It has the source, eggs and installers for various platforms.
-* 2006-03-20: `lxml 0.9`_ released (`changes for 0.9`_)
+.. _`lxml at the Python cheeseshop`: http://cheeseshop.python.org/pypi/lxml/
-* 2005-11-03: `lxml 0.8`_ released (`changes for 0.8`_)
+Please take a look at the `installation instructions`_!
-* 2005-06-15: `lxml 0.7`_ released (`changes for 0.7`_)
+.. _`installation instructions`: installation.html
-* 2005-05-14: `lxml 0.6`_ released (`changes for 0.6`_)
+* `lxml 0.9.2`_, released 2006-05-10 (`changes for 0.9.2`_)
-* 2005-04-09: `lxml 0.5.1`_ released (`changes for 0.5.1`_)
+* `lxml 0.9.1`_, released 2006-03-30 (`changes for 0.9.1`_)
-* 2005-04-08: `lxml 0.5`_ released!
+* `lxml 0.9`_, released 2006-03-20 (`changes for 0.9`_)
-.. _`lxml 0.9.1`: lxml-0.9.1.tgz
+* `lxml 0.8`_, released 2005-11-03 (`changes for 0.8`_)
-.. _`lxml 0.9`: lxml-0.9.tgz
+* `lxml 0.7`_, released 2005-06-15 (`changes for 0.7`_)
-.. _`lxml 0.8`: lxml-0.8.tgz
+* `lxml 0.6`_, released 2005-05-14 (`changes for 0.6`_)
-.. _`lxml 0.7`: lxml-0.7.tgz
+* `lxml 0.5.1`_, released 2005-04-09 (`changes for 0.5.1`_)
-.. _`lxml 0.6`: lxml-0.6.tgz
+* `lxml 0.5`_, released 2005-04-08
+.. _`lxml 0.9.2`: lxml-0.9.2.tgz
+.. _`lxml 0.9.1`: lxml-0.9.1.tgz
+.. _`lxml 0.9`: lxml-0.9.tgz
+.. _`lxml 0.8`: lxml-0.8.tgz
+.. _`lxml 0.7`: lxml-0.7.tgz
+.. _`lxml 0.6`: lxml-0.6.tgz
.. _`lxml 0.5.1`: lxml-0.5.1.tgz
-
.. _`lxml 0.5`: lxml-0.5.tgz
+.. _`CHANGES for 0.9.2`: changes-0.9.2.html
.. _`CHANGES for 0.9.1`: changes-0.9.1.html
-
.. _`CHANGES for 0.9`: changes-0.9.html
-
.. _`CHANGES for 0.8`: changes-0.8.html
-
.. _`CHANGES for 0.7`: changes-0.7.html
-
.. _`CHANGES for 0.6`: changes-0.6.html
-
.. _`CHANGES for 0.5.1`: changes-0.5.1.html
+It's also possible to check out the latest development version of lxml
+from svn directly, using a command like this::
+
+ svn co http://codespeak.net/svn/lxml/trunk lxml
+
+You can also `browse it through the web`_. Please read `how to build lxml
+from source`_ first. The `latest CHANGES`_ of the developer version are also
+accessible. You can check there if a bug you found has been fixed or a
+feature you want has been implemented in the latest trunk version.
+
+.. _`how to build lxml from source`: build.html
+.. _`browse it through the web`: http://codespeak.net/svn/lxml
+.. _`latest CHANGES`: http://codespeak.net/svn/lxml/trunk/CHANGES.txt
+
Documentation
-------------
@@ -76,25 +92,15 @@
in the standar dlibrary.
.. _`ElementTree API`: http://effbot.org/zone/element-index.htm
-
.. _`ElementTree compatibility overview`: compatibility.html
-
.. _`extends this API`: api.html
-
.. _`extension functions`: extensions.html
-
.. _XPath: http://www.w3.org/TR/xpath
-
.. _`Relax NG`: http://www.relaxng.org/
-
.. _`XML Schema`: http://www.w3.org/XML/Schema
-
.. _`XSLT`: http://www.w3.org/TR/xslt
-
.. _`c14n`: http://www.w3.org/TR/2001/REC-xml-c14n-20010315
-
.. _`implementing namespaces`: namespace_extensions.html
-
.. _`SAX compliant API`: sax.html
Mailing list
@@ -104,46 +110,6 @@
.. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev
-Download
---------
-
-* `lxml 0.9.1`_ (2006-03-30)
-
-* `lxml 0.9`_ (2006-03-20)
-
-* `lxml 0.8`_ (2005-11-03)
-
-* `lxml 0.7`_ (2005-06-15)
-
-* `lxml 0.6`_ (2005-05-14)
-
-* `lxml 0.5.1`_ (2005-04-09)
-
-* `lxml 0.5`_ (2005-04-08)
-
-Instead of downloading the source here, you can also find `lxml at the
-Python cheeseshop`_ in source, egg and installer form for various
-platforms.
-
-.. _`lxml at the Python cheeseshop`: http://cheeseshop.python.org/pypi/lxml/
-
-See also the `installation instructions`_.
-
-.. _`installation instructions`: installation.html
-
-It's also possible to check out the latest development version of lxml
-from svn directly, using a command like this::
-
- svn co http://codespeak.net/svn/lxml/trunk lxml
-
-You can also `browse it through the web`_. The `latest CHANGES`_ of the
-developer version are also accessible. You can check there if a bug you found
-has been fixed or a feature you want has been implemented in the latest trunk
-version.
-
-.. _`browse it through the web`: http://codespeak.net/svn/lxml
-.. _`latest CHANGES`: http://codespeak.net/svn/lxml/trunk/CHANGES.txt
-
License
-------
From scoder at codespeak.net Wed May 17 10:00:48 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 10:00:48 +0200 (CEST)
Subject: [Lxml-checkins] r27326 - in lxml/branch/lxml-0.9.x: . doc
Message-ID: <20060517080048.2171B100B0@code0.codespeak.net>
Author: scoder
Date: Wed May 17 10:00:46 2006
New Revision: 27326
Added:
lxml/branch/lxml-0.9.x/doc/build.txt
- copied unchanged from r27325, lxml/trunk/doc/build.txt
Modified:
lxml/branch/lxml-0.9.x/INSTALL.txt
lxml/branch/lxml-0.9.x/doc/main.txt
Log:
merged in build.txt from trunk
Modified: lxml/branch/lxml-0.9.x/INSTALL.txt
==============================================================================
--- lxml/branch/lxml-0.9.x/INSTALL.txt (original)
+++ lxml/branch/lxml-0.9.x/INSTALL.txt Wed May 17 10:00:46 2006
@@ -26,11 +26,13 @@
than the required version above. While there were not any bug reports so far,
you may still encounter certain differences in behaviour in rare cases.
-If you want to build lxml from SVN, you also need Pyrex_. If you are using a
-released version of lxml, it should come with the generated C file in the
-source distribution, so no Pyrex is needed in that case.
+If you want to build lxml from SVN, you also need Pyrex_. Please read `how to
+build lxml from source`_ in this case. If you are using a released version of
+lxml, it should come with the generated C file in the source distribution, so
+no Pyrex is needed in that case.
.. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/
+.. _`how to build lxml from source`: build.html
Note that Pyrex up to and including version 0.9.4 has known problems when
compiling lxml with gcc 4.0 or Python 2.4. Do not use it. If you want to
@@ -55,49 +57,5 @@
This has been reported to work on Linux, MacOS-X 10.4 and Windows, as long as
libxml2 and libxslt are properly installed. To compile and install lxml
-without easy_install, download the source tar-ball, unpack it and type::
-
- python setup.py install
-
-If you do not want to install lxml right away, but first test it from the
-source directory, you can build it in-place like this::
-
- python setup.py build_ext -i
-
-or just::
-
- make
-
-If you then place lxml's "src" directory on your PYTHONPATH somehow, you can
-import lxml.etree and play with it.
-
-
-Running the tests and reporting errors
---------------------------------------
-
-The source distribution (tgz) contains a test suite for lxml. You can run it
-from the top-level directory::
-
- python test.py
-
-Note that the test script only tests the in-place build (see "Installation"
-above), as it searches the "src" directory. You can use the following
-one-step command to trigger an in-place build and test it::
-
- make clean test
-
-To run the ElementTree and cElementTree compatibility tests, make sure
-you have lxml on your PYTHONPATH first, then run::
-
- python selftest.py
-
-and::
-
- python selftest2.py
-
-If the tests give failures, errors, or worse, segmentation faults, we'd really
-like to know. Please contact us on the `mailing list`_, and please specify the
-version of lxml, libxml2, libxslt and Python you were using, as well as your
-operating system type (Linux, Windows, MacOs, ...).
-
-.. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev
+without easy_install, please read `how to build lxml from source`_ (or the
+file ``build.txt`` in the ``doc`` directory of the source tree).
Modified: lxml/branch/lxml-0.9.x/doc/main.txt
==============================================================================
--- lxml/branch/lxml-0.9.x/doc/main.txt (original)
+++ lxml/branch/lxml-0.9.x/doc/main.txt Wed May 17 10:00:46 2006
@@ -63,11 +63,12 @@
svn co http://codespeak.net/svn/lxml/trunk lxml
-You can also `browse it through the web`_. The `latest CHANGES`_ of the
-developer version are also accessible. You can check there if a bug you found
-has been fixed or a feature you want has been implemented in the latest trunk
-version.
+You can also `browse it through the web`_. Please read `how to build lxml
+from source`_ first. The `latest CHANGES`_ of the developer version are also
+accessible. You can check there if a bug you found has been fixed or a
+feature you want has been implemented in the latest trunk version.
+.. _`how to build lxml from source`: build.html
.. _`browse it through the web`: http://codespeak.net/svn/lxml
.. _`latest CHANGES`: http://codespeak.net/svn/lxml/trunk/CHANGES.txt
From scoder at codespeak.net Wed May 17 10:06:33 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 10:06:33 +0200 (CEST)
Subject: [Lxml-checkins] r27327 - lxml/trunk/doc
Message-ID: <20060517080633.D6D9A100B0@code0.codespeak.net>
Author: scoder
Date: Wed May 17 10:06:32 2006
New Revision: 27327
Modified:
lxml/trunk/doc/build.txt
Log:
typos etc.
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Wed May 17 10:06:32 2006
@@ -53,7 +53,7 @@
make
-If you then place lxml's "src" directory on your PYTHONPATH somehow, you can
+If you then place lxml's ``src`` directory on your PYTHONPATH somehow, you can
import ``lxml.etree`` and play with it.
@@ -66,7 +66,7 @@
python test.py
Note that the test script only tests the in-place build (see "Installation"
-above), as it searches the "src" directory. You can use the following
+above), as it searches the ``src`` directory. You can use the following
one-step command to trigger an in-place build and test it::
make clean test
@@ -91,7 +91,7 @@
Static linking on Windows
-------------------------
-Most operating systems have proper package mangement that makes installing
+Most operating systems have proper package management that makes installing
current versions of libxml2 and libxslt easy. However, Microsoft Windows
lacks these capabilities. It can therefore be interesting to statically link
the external libraries into lxml.etree to avoid having to install them
@@ -100,7 +100,7 @@
.. _`David Sankel`: http://codespeak.net/pipermail/lxml-dev/2006-May/001196.html
Download lxml and all required libraries to the same directory. The iconv,
-libxml2, libxslt, and zlib libraries are all availible from xmlsoft.org. The
+libxml2, libxslt, and zlib libraries are all available from xmlsoft.org. The
place to go on the ftp site is ftp://xmlsoft.org/libxml2/win32.
Your directory should now have something like the following files in it::
From scoder at codespeak.net Wed May 17 10:06:59 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 10:06:59 +0200 (CEST)
Subject: [Lxml-checkins] r27328 - lxml/branch/lxml-0.9.x/doc
Message-ID: <20060517080659.B6706100B3@code0.codespeak.net>
Author: scoder
Date: Wed May 17 10:06:58 2006
New Revision: 27328
Modified:
lxml/branch/lxml-0.9.x/doc/build.txt
Log:
merged in doc fixes from trunk
Modified: lxml/branch/lxml-0.9.x/doc/build.txt
==============================================================================
--- lxml/branch/lxml-0.9.x/doc/build.txt (original)
+++ lxml/branch/lxml-0.9.x/doc/build.txt Wed May 17 10:06:58 2006
@@ -53,7 +53,7 @@
make
-If you then place lxml's "src" directory on your PYTHONPATH somehow, you can
+If you then place lxml's ``src`` directory on your PYTHONPATH somehow, you can
import ``lxml.etree`` and play with it.
@@ -66,7 +66,7 @@
python test.py
Note that the test script only tests the in-place build (see "Installation"
-above), as it searches the "src" directory. You can use the following
+above), as it searches the ``src`` directory. You can use the following
one-step command to trigger an in-place build and test it::
make clean test
@@ -91,7 +91,7 @@
Static linking on Windows
-------------------------
-Most operating systems have proper package mangement that makes installing
+Most operating systems have proper package management that makes installing
current versions of libxml2 and libxslt easy. However, Microsoft Windows
lacks these capabilities. It can therefore be interesting to statically link
the external libraries into lxml.etree to avoid having to install them
@@ -100,7 +100,7 @@
.. _`David Sankel`: http://codespeak.net/pipermail/lxml-dev/2006-May/001196.html
Download lxml and all required libraries to the same directory. The iconv,
-libxml2, libxslt, and zlib libraries are all availible from xmlsoft.org. The
+libxml2, libxslt, and zlib libraries are all available from xmlsoft.org. The
place to go on the ftp site is ftp://xmlsoft.org/libxml2/win32.
Your directory should now have something like the following files in it::
From scoder at codespeak.net Wed May 17 10:12:07 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 10:12:07 +0200 (CEST)
Subject: [Lxml-checkins] r27329 - lxml/trunk/doc
Message-ID: <20060517081207.29787100B3@code0.codespeak.net>
Author: scoder
Date: Wed May 17 10:12:06 2006
New Revision: 27329
Modified:
lxml/trunk/doc/build.txt
Log:
doc fixes
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Wed May 17 10:12:06 2006
@@ -125,8 +125,8 @@
zlib-1.2.3.win32/
zlib-1.2.3.win32.zip
-Go to the lxml-0.9.2 directory and edit the Makefile. There should be a
-section that looks like this::
+Go to the lxml-0.9.2 directory and edit the file ``setup.py``. There should be
+a section that looks like this::
ext_modules = [ Extension(
"lxml.etree",
From scoder at codespeak.net Wed May 17 10:12:28 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 10:12:28 +0200 (CEST)
Subject: [Lxml-checkins] r27330 - lxml/branch/lxml-0.9.x/doc
Message-ID: <20060517081228.DEBA2100B3@code0.codespeak.net>
Author: scoder
Date: Wed May 17 10:12:27 2006
New Revision: 27330
Modified:
lxml/branch/lxml-0.9.x/doc/build.txt
Log:
merged in doc fixes from trunk
Modified: lxml/branch/lxml-0.9.x/doc/build.txt
==============================================================================
--- lxml/branch/lxml-0.9.x/doc/build.txt (original)
+++ lxml/branch/lxml-0.9.x/doc/build.txt Wed May 17 10:12:27 2006
@@ -125,8 +125,8 @@
zlib-1.2.3.win32/
zlib-1.2.3.win32.zip
-Go to the lxml-0.9.2 directory and edit the Makefile. There should be a
-section that looks like this::
+Go to the lxml-0.9.2 directory and edit the file ``setup.py``. There should be
+a section that looks like this::
ext_modules = [ Extension(
"lxml.etree",
From scoder at codespeak.net Wed May 17 10:53:11 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 10:53:11 +0200 (CEST)
Subject: [Lxml-checkins] r27331 - lxml/trunk/doc
Message-ID: <20060517085311.8551010050@code0.codespeak.net>
Author: scoder
Date: Wed May 17 10:53:10 2006
New Revision: 27331
Modified:
lxml/trunk/doc/build.txt
Log:
doc fixes
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Wed May 17 10:53:10 2006
@@ -65,9 +65,9 @@
python test.py
-Note that the test script only tests the in-place build (see "Installation"
-above), as it searches the ``src`` directory. You can use the following
-one-step command to trigger an in-place build and test it::
+Note that the test script only tests the in-place build (see distutils
+building above), as it searches the ``src`` directory. You can use the
+following one-step command to trigger an in-place build and test it::
make clean test
From scoder at codespeak.net Wed May 17 10:53:35 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 10:53:35 +0200 (CEST)
Subject: [Lxml-checkins] r27332 - lxml/branch/lxml-0.9.x/doc
Message-ID: <20060517085335.8AAAB10050@code0.codespeak.net>
Author: scoder
Date: Wed May 17 10:53:34 2006
New Revision: 27332
Modified:
lxml/branch/lxml-0.9.x/doc/build.txt
Log:
merged in doc fixes from trunk
Modified: lxml/branch/lxml-0.9.x/doc/build.txt
==============================================================================
--- lxml/branch/lxml-0.9.x/doc/build.txt (original)
+++ lxml/branch/lxml-0.9.x/doc/build.txt Wed May 17 10:53:34 2006
@@ -65,9 +65,9 @@
python test.py
-Note that the test script only tests the in-place build (see "Installation"
-above), as it searches the ``src`` directory. You can use the following
-one-step command to trigger an in-place build and test it::
+Note that the test script only tests the in-place build (see distutils
+building above), as it searches the ``src`` directory. You can use the
+following one-step command to trigger an in-place build and test it::
make clean test
From scoder at codespeak.net Wed May 17 11:13:48 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 11:13:48 +0200 (CEST)
Subject: [Lxml-checkins] r27333 - lxml/trunk/src/lxml/tests
Message-ID: <20060517091348.51C92100A8@code0.codespeak.net>
Author: scoder
Date: Wed May 17 11:13:47 2006
New Revision: 27333
Modified:
lxml/trunk/src/lxml/tests/test_etree.py
Log:
test case for Element.append(None) - could crash in 0.9
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Wed May 17 11:13:47 2006
@@ -119,7 +119,12 @@
self.assertRaises(TypeError,
a.__setitem__, 0, 'foo')
-
+
+ def test_append_None(self):
+ # raises AssertionError in ElementTree
+ Element = self.etree.Element
+ self.assertRaises(TypeError, Element('a').append, None)
+
# gives error in ElementTree
def test_comment_empty(self):
Element = self.etree.Element
From scoder at codespeak.net Wed May 17 11:19:38 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 11:19:38 +0200 (CEST)
Subject: [Lxml-checkins] r27334 - in lxml/trunk: doc src/lxml src/lxml/tests
Message-ID: <20060517091938.71F87100A8@code0.codespeak.net>
Author: scoder
Date: Wed May 17 11:19:36 2006
New Revision: 27334
Modified:
lxml/trunk/doc/compatibility.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_elementtree.py
lxml/trunk/src/lxml/tests/test_etree.py
Log:
prefer TypeError over AssertionError when passing None into API functions
Modified: lxml/trunk/doc/compatibility.txt
==============================================================================
--- lxml/trunk/doc/compatibility.txt (original)
+++ lxml/trunk/doc/compatibility.txt Wed May 17 11:19:36 2006
@@ -86,7 +86,9 @@
* When trying to set a subelement using __setitem__ that is in fact not an
Element but some other object, etree raises a TypeError, and ElementTree
- raises an AssertionError.
+ raises an AssertionError. This also applies to some other places of the
+ API. In general, etree tries to avoid AssertionErrors in favour of being
+ more specific about the reason for the exception.
* ElementTree ignores comments when parsing XML, while etree will read them in
and treat them as Comment elements.
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Wed May 17 11:19:36 2006
@@ -1402,16 +1402,12 @@
def iselement(element):
return isinstance(element, _Element)
-def dump(_NodeBase elem):
- assert elem is not None, "Must supply element."
- # better, but not ET compatible : "_NodeBase elem not None"
+def dump(_NodeBase elem not None):
_dumpToFile(sys.stdout, elem._doc._c_doc, elem._c_node)
def tostring(element_or_tree, encoding='us-ascii', xml_declaration=None):
"Serialize an element to an encoded string representation of its XML tree."
cdef int write_declaration
- assert element_or_tree is not None # for ElementTree compatibility only
-
encoding = str(encoding)
if xml_declaration is None:
# by default, write an XML declaration only for non-standard encodings
@@ -1435,7 +1431,6 @@
Note that the result does not carry an XML encoding declaration and is
therefore not necessarily suited for serialization without further
treatment."""
- assert element_or_tree is not None # for ElementTree compatibility only
if isinstance(element_or_tree, _NodeBase):
return _tounicode(<_NodeBase>element_or_tree)
elif isinstance(element_or_tree, _ElementTree):
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Wed May 17 11:19:36 2006
@@ -1601,10 +1601,6 @@
canonicalize(tostring(b)))
self.assertEquals(' ',
canonicalize(tostring(c)))
-
- def test_tostring_none(self):
- tostring = self.etree.tostring
- self.assertRaises(AssertionError, self.etree.tostring, None)
def test_tostring_element_tail(self):
tostring = self.etree.tostring
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Wed May 17 11:19:36 2006
@@ -155,7 +155,7 @@
# test passing 'None' to dump
def test_dump_none(self):
- self.assertRaises(AssertionError, etree.dump, None)
+ self.assertRaises(TypeError, etree.dump, None)
def test_prefix(self):
ElementTree = self.etree.ElementTree
@@ -430,6 +430,11 @@
self.assertEquals(docinfo.root_name, 'html')
self.assertEquals(docinfo.doctype, '')
+ def test_tostring_none(self):
+ # ElementTree raises an AssertionError here
+ tostring = self.etree.tostring
+ self.assertRaises(TypeError, self.etree.tostring, None)
+
def test_tounicode(self):
tounicode = self.etree.tounicode
Element = self.etree.Element
@@ -461,7 +466,7 @@
def test_tounicode_none(self):
tounicode = self.etree.tounicode
- self.assertRaises(AssertionError, self.etree.tounicode, None)
+ self.assertRaises(TypeError, self.etree.tounicode, None)
def test_tounicode_element_tail(self):
tounicode = self.etree.tounicode
From scoder at codespeak.net Wed May 17 11:21:06 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 11:21:06 +0200 (CEST)
Subject: [Lxml-checkins] r27335 - lxml/trunk/src/lxml
Message-ID: <20060517092106.E3333100A8@code0.codespeak.net>
Author: scoder
Date: Wed May 17 11:21:05 2006
New Revision: 27335
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
set lxml.etree.__version__ to LXML_VERSION_STRING
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Wed May 17 11:21:05 2006
@@ -84,6 +84,8 @@
LIBXML_COMPILED_VERSION = __unpackIntVersion(tree.LIBXML_VERSION)
LXML_VERSION = __unpackDottedVersion(tree.LXML_VERSION_STRING)
+__version__ = tree.LXML_VERSION_STRING
+
# class for temporary storage of Python references
cdef class _TempStore:
From scoder at codespeak.net Wed May 17 11:32:39 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 11:32:39 +0200 (CEST)
Subject: [Lxml-checkins] r27336 - lxml/branch/lxml-0.9.x/src/lxml
Message-ID: <20060517093239.A7B4D10097@code0.codespeak.net>
Author: scoder
Date: Wed May 17 11:32:38 2006
New Revision: 27336
Modified:
lxml/branch/lxml-0.9.x/src/lxml/etree.pyx
Log:
fix exception raising from _raiseIfNone
Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/lxml-0.9.x/src/lxml/etree.pyx (original)
+++ lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Wed May 17 11:32:38 2006
@@ -1300,7 +1300,7 @@
# Private helper functions
-cdef void _raiseIfNone(el):
+cdef _raiseIfNone(el):
if el is None:
raise TypeError, "Argument must not be None."
From scoder at codespeak.net Wed May 17 11:36:06 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 11:36:06 +0200 (CEST)
Subject: [Lxml-checkins] r27337 - lxml/trunk/src/lxml/tests
Message-ID: <20060517093606.8A0291009C@code0.codespeak.net>
Author: scoder
Date: Wed May 17 11:36:04 2006
New Revision: 27337
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
lxml/trunk/src/lxml/tests/test_etree.py
Log:
moved tostring(UTF-16) test case to test_etree as it fails in ElementTree
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Wed May 17 11:36:04 2006
@@ -1723,19 +1723,6 @@
a.text = u'S?k p? nettet'
self.assert_(tostring(a, 'UTF-8') in [xml, prologue + xml])
- def test_encoding_tostring_utf16(self):
- tostring = self.etree.tostring
- Element = self.etree.Element
- SubElement = self.etree.SubElement
-
- a = Element('a')
- b = SubElement(a, 'b')
- c = SubElement(a, 'c')
-
- result = unicode(tostring(a, 'UTF-16'), 'UTF-16')
- self.assertEquals(' ',
- canonicalize(result))
-
def test_encoding_tostring_sub(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Wed May 17 11:36:04 2006
@@ -430,6 +430,20 @@
self.assertEquals(docinfo.root_name, 'html')
self.assertEquals(docinfo.doctype, '')
+ def test_encoding_tostring_utf16(self):
+ # ElementTree fails to serialize this
+ tostring = self.etree.tostring
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+
+ result = unicode(tostring(a, 'UTF-16'), 'UTF-16')
+ self.assertEquals(' ',
+ canonicalize(result))
+
def test_tostring_none(self):
# ElementTree raises an AssertionError here
tostring = self.etree.tostring
From scoder at codespeak.net Wed May 17 12:13:58 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 12:13:58 +0200 (CEST)
Subject: [Lxml-checkins] r27338 - lxml/trunk
Message-ID: <20060517101358.A3D2A10050@code0.codespeak.net>
Author: scoder
Date: Wed May 17 12:13:57 2006
New Revision: 27338
Modified:
lxml/trunk/setup.py
Log:
cleanup in setup.py
Modified: lxml/trunk/setup.py
==============================================================================
--- lxml/trunk/setup.py (original)
+++ lxml/trunk/setup.py Wed May 17 12:13:57 2006
@@ -12,7 +12,7 @@
except IOError:
svn_version = version
else:
- revision = re.search("]*name=\"\"[^>]*revision=\"([^\"]+)\"",
+ revision = re.search(']*name=""[^>]*revision="([^"]+)"',
svn_entries).group(1)
svn_version = version + '-' + revision
@@ -47,22 +47,19 @@
sources = ["src/lxml/etree.c"]
try:
- changelog = open("CHANGES.txt", 'r')
+ changelog = open(os.path.join(src_dir, "CHANGES.txt"), 'r')
except:
print "*NOTE*: couldn't open CHANGES.txt !"
else:
- inside = 0
changelog_lines = []
for line in changelog:
if line.startswith('====='):
- inside += 1
- if inside > 3:
+ if len(changelog_lines) > 1:
break
- if inside > 1:
+ if changelog_lines:
changelog_lines.append(line)
elif version in line:
changelog_lines.append(line)
- inside += 1
if changelog_lines:
changelog_text = ''.join(changelog_lines[:-1])
From scoder at codespeak.net Wed May 17 12:43:24 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 12:43:24 +0200 (CEST)
Subject: [Lxml-checkins] r27339 - lxml/branch/lxml-0.9.x
Message-ID: <20060517104324.75B0D10050@code0.codespeak.net>
Author: scoder
Date: Wed May 17 12:43:22 2006
New Revision: 27339
Modified:
lxml/branch/lxml-0.9.x/CHANGES.txt
Log:
CHANGES.txt: possible crashes in 0.9.x when passing None arguments
Modified: lxml/branch/lxml-0.9.x/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-0.9.x/CHANGES.txt (original)
+++ lxml/branch/lxml-0.9.x/CHANGES.txt Wed May 17 12:43:22 2006
@@ -10,6 +10,8 @@
Bugs fixed
----------
+* Some API functions didn't handle invalid None arguments correctly
+
* Element/SubElement failed to set attribute namespaces from passed ``attrib``
dictionary
From scoder at codespeak.net Wed May 17 12:58:41 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 12:58:41 +0200 (CEST)
Subject: [Lxml-checkins] r27341 - lxml/trunk/src/lxml
Message-ID: <20060517105841.1B60810064@code0.codespeak.net>
Author: scoder
Date: Wed May 17 12:58:39 2006
New Revision: 27341
Added:
lxml/trunk/src/lxml/xmlwriter.pxi
- copied unchanged from r27340, lxml/trunk/src/lxml/apihelpers.pxi
Log:
copied apihelpers.pxi to xmlwriter.pxi before split
From scoder at codespeak.net Wed May 17 13:04:39 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 13:04:39 +0200 (CEST)
Subject: [Lxml-checkins] r27342 - lxml/trunk/src/lxml
Message-ID: <20060517110439.880CB10060@code0.codespeak.net>
Author: scoder
Date: Wed May 17 13:04:38 2006
New Revision: 27342
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/xmlwriter.pxi
Log:
moved XML output funtions from apihelpers.pxi to xmlwriter.pxi
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Wed May 17 13:04:38 2006
@@ -1,164 +1,4 @@
-# Private helper functions for input/output and API functions
-
-# XML I/O helpers
-
-cdef _tostring(_NodeBase element, encoding, int write_xml_declaration):
- "Serialize an element to an encoded string representation of its XML tree."
- cdef _Document doc
- cdef tree.xmlOutputBuffer* c_buffer
- cdef tree.xmlBuffer* c_result_buffer
- cdef tree.xmlCharEncodingHandler* enchandler
- cdef char* c_enc
- cdef char* c_version
- if element is None:
- return None
- if encoding in ('utf8', 'UTF8', 'utf-8'):
- encoding = 'UTF-8'
- doc = element._doc
- c_enc = encoding
- # it is necessary to *and* find the encoding handler *and* use
- # encoding during output
- enchandler = tree.xmlFindCharEncodingHandler(c_enc)
- c_buffer = tree.xmlAllocOutputBuffer(enchandler)
- if c_buffer is NULL:
- raise LxmlError, "Failed to create output buffer"
-
- try:
- _writeNodeToBuffer(c_buffer, doc._c_doc, element._c_node,
- doc._c_doc.version, c_enc, write_xml_declaration)
- tree.xmlOutputBufferFlush(c_buffer)
- if c_buffer.conv is not NULL:
- c_result_buffer = c_buffer.conv
- else:
- c_result_buffer = c_buffer.buffer
- result = python.PyString_FromStringAndSize(
- tree.xmlBufferContent(c_result_buffer),
- tree.xmlBufferLength(c_result_buffer))
- finally:
- tree.xmlOutputBufferClose(c_buffer)
- return result
-
-cdef _tounicode(_NodeBase element):
- "Serialize an element to the Python unicode representation of its XML tree."
- cdef _Document doc
- cdef tree.xmlOutputBuffer* c_buffer
- cdef tree.xmlBuffer* c_result_buffer
- if element is None:
- return None
- doc = element._doc
- c_buffer = tree.xmlAllocOutputBuffer(NULL)
- if c_buffer is NULL:
- raise LxmlError, "Failed to create output buffer"
- try:
- _writeNodeToBuffer(c_buffer, doc._c_doc, element._c_node,
- NULL, NULL, 0)
- tree.xmlOutputBufferFlush(c_buffer)
- if c_buffer.conv is not NULL:
- c_result_buffer = c_buffer.conv
- else:
- c_result_buffer = c_buffer.buffer
- result = python.PyUnicode_DecodeUTF8(
- tree.xmlBufferContent(c_result_buffer),
- tree.xmlBufferLength(c_result_buffer),
- 'strict')
- finally:
- tree.xmlOutputBufferClose(c_buffer)
- return result
-
-cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer,
- xmlDoc* c_doc, xmlNode* c_node,
- char* xml_version, char* encoding,
- int write_xml_declaration):
- if write_xml_declaration:
- _writeDeclarationToBuffer(c_buffer, xml_version, encoding)
-
- tree.xmlNodeDumpOutput(c_buffer, c_doc, c_node, 0, 0, encoding)
- _dumpNextNode(c_buffer, c_doc, c_node, encoding)
-
-cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
- char* version, char* encoding):
- if version is NULL:
- version = "1.0"
- tree.xmlOutputBufferWriteString(c_buffer, "\n")
-
-# output to file-like objects
-cdef class _FileWriter:
- cdef object _filelike
- cdef _ExceptionContext _exc_context
- def __init__(self, filelike, exc_context=None):
- self._filelike = filelike
- if exc_context is None:
- self._exc_context = _ExceptionContext()
- else:
- self._exc_context = exc_context
-
- cdef tree.xmlOutputBuffer* _createOutputBuffer(
- self, tree.xmlCharEncodingHandler* enchandler) except NULL:
- cdef tree.xmlOutputBuffer* c_buffer
- c_buffer = tree.xmlOutputBufferCreateIO(
- _writeFilelikeWriter, _closeFilelikeWriter,
- self, enchandler)
- if c_buffer is NULL:
- raise IOError, "Could not create I/O writer context."
- return c_buffer
-
- cdef int write(self, char* c_buffer, int len):
- try:
- if self._filelike is None:
- raise IOError, "File is already closed"
- py_buffer = python.PyString_FromStringAndSize(c_buffer, len)
- self._filelike.write(py_buffer)
- return len
- except Exception:
- self._exc_context._store_raised()
- return -1
-
- cdef int close(self):
- # we should not close the file here as we didn't open it
- self._filelike = None
- return 0
-
-cdef int _writeFilelikeWriter(void* ctxt, char* c_buffer, int len):
- return (<_FileWriter>ctxt).write(c_buffer, len)
-
-cdef int _closeFilelikeWriter(void* ctxt):
- return (<_FileWriter>ctxt).close()
-
-cdef _tofile(f, _NodeBase element, encoding, int write_declaration):
- cdef _FileWriter writer
- cdef tree.xmlOutputBuffer* c_buffer
- cdef tree.xmlCharEncodingHandler* enchandler
- cdef char* c_enc
- if encoding is None:
- c_enc = NULL
- else:
- c_enc = encoding
-
- enchandler = tree.xmlFindCharEncodingHandler(c_enc)
- if python.PyString_Check(f) or python.PyUnicode_Check(f):
- filename = _utf8(f)
- c_buffer = tree.xmlOutputBufferCreateFilename(
- _cstr(filename), enchandler, 0)
- elif hasattr(f, 'write'):
- writer = _FileWriter(f)
- c_buffer = writer._createOutputBuffer(enchandler)
- else:
- raise TypeError, "File or filename expected, got '%s'" % type(f)
-
- _writeNodeToBuffer(c_buffer,
- element._doc._c_doc, element._c_node,
- element._doc._c_doc.version, c_enc,
- write_declaration)
-
- tree.xmlOutputBufferClose(c_buffer)
- if writer is not None:
- writer._exc_context._raise_if_stored()
-
-# Private helper functions
+# Private helper functions for API functions
cdef void displayNode(xmlNode* c_node, indent):
# to help with debugging
@@ -260,27 +100,6 @@
c_attrib_node.ns.href)
return funicode(value)
-cdef _dumpToFile(f, xmlDoc* c_doc, xmlNode* c_node):
- cdef python.PyObject* o
- cdef tree.xmlOutputBuffer* c_buffer
-
- if not python.PyFile_Check(f):
- raise ValueError, "Not a file"
- o = f
- c_buffer = tree.xmlOutputBufferCreateFile(python.PyFile_AsFile(o), NULL)
- tree.xmlNodeDumpOutput(c_buffer, c_doc, c_node, 0, 0, NULL)
- # dump next node if it's a text node
- _dumpNextNode(c_buffer, c_doc, c_node, NULL)
- tree.xmlOutputBufferWriteString(c_buffer, '\n')
- tree.xmlOutputBufferFlush(c_buffer)
-
-cdef void _dumpNextNode(tree.xmlOutputBuffer* c_buffer, xmlDoc* c_doc,
- xmlNode* c_node, char* encoding):
- cdef xmlNode* c_next
- c_next = c_node.next
- if c_next is not NULL and c_next.type == tree.XML_TEXT_NODE:
- tree.xmlNodeDumpOutput(c_buffer, c_doc, c_next, 0, 0, encoding)
-
cdef object __REPLACE_XML_ENCODING
__REPLACE_XML_ENCODING = re.compile(
r'^(\s*<\?\s*xml[^>]+)\s+encoding\s*=\s*"[^"]*"\s*', re.U).sub
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Wed May 17 13:04:38 2006
@@ -1456,6 +1456,7 @@
include "nsclasses.pxi" # Namespace implementation and registry
include "docloader.pxi" # Support for custom document loaders
include "parser.pxi" # XML Parser
+include "xmlwriter.pxi" # XML output functions
include "xmlid.pxi" # XMLID and IDDict
include "extensions.pxi" # XPath/XSLT extension functions
include "xpath.pxi" # XPath evaluation
Modified: lxml/trunk/src/lxml/xmlwriter.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlwriter.pxi (original)
+++ lxml/trunk/src/lxml/xmlwriter.pxi Wed May 17 13:04:38 2006
@@ -1,6 +1,4 @@
-# Private helper functions for input/output and API functions
-
-# XML I/O helpers
+# XML serialization and output functions
cdef _tostring(_NodeBase element, encoding, int write_xml_declaration):
"Serialize an element to an encoded string representation of its XML tree."
@@ -86,6 +84,7 @@
tree.xmlOutputBufferWriteString(c_buffer, "'?>\n")
# output to file-like objects
+
cdef class _FileWriter:
cdef object _filelike
cdef _ExceptionContext _exc_context
@@ -158,107 +157,7 @@
if writer is not None:
writer._exc_context._raise_if_stored()
-# Private helper functions
-
-cdef void displayNode(xmlNode* c_node, indent):
- # to help with debugging
- cdef xmlNode* c_child
- print indent * ' ', c_node
- c_child = c_node.children
- while c_child is not NULL:
- displayNode(c_child, indent + 1)
- c_child = c_child.next
-
-cdef _Document _documentOrRaise(object input):
- cdef _Document doc
- doc = _documentOf(input)
- if doc is None:
- raise TypeError, "Invalid input object: %s" % type(input)
- else:
- return doc
-
-cdef _Document _documentOf(object input):
- # call this to get the document of a
- # _Document, _ElementTree or _NodeBase object
- if isinstance(input, _ElementTree):
- return (<_ElementTree>input)._doc
- elif isinstance(input, _NodeBase):
- return (<_NodeBase>input)._doc
- elif isinstance(input, _Document):
- return <_Document>input
- else:
- return None
-
-cdef _NodeBase _rootNodeOf(object input):
- # call this to get the root node of a
- # _Document, _ElementTree or _NodeBase object
- if isinstance(input, _ElementTree):
- return (<_ElementTree>input)._context_node
- elif isinstance(input, _NodeBase):
- return <_NodeBase>input
- elif isinstance(input, _Document):
- return (<_Document>input).getroot()
- else:
- return None
-
-cdef xmlDoc* _fakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node):
- # build a temporary document that has the given node as root node
- # note that copy and original must not be modified during its lifetime!!
- # always call _destroyFakeDoc() after use!
- cdef xmlNode* c_child
- cdef xmlNode* c_root
- cdef xmlDoc* c_doc
- c_root = tree.xmlDocGetRootElement(c_base_doc)
- if c_root == c_node:
- # already the root node
- return c_base_doc
-
- c_doc = tree.xmlCopyDoc(c_base_doc, 0) # non recursive!
- c_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive!
-
- c_root.children = c_node.children
- c_root.last = c_node.last
- c_root.next = c_root.prev = c_root.parent = NULL
-
- # store original node
- c_root._private = c_node
-
- # divert parent pointers of children
- c_child = c_root.children
- while c_child is not NULL:
- c_child.parent = c_root
- c_child = c_child.next
-
- c_doc.children = c_root
- return c_doc
-
-cdef void _destroyFakeDoc(xmlDoc* c_base_doc, xmlDoc* c_doc):
- # delete a temporary document
- cdef xmlNode* c_child
- cdef xmlNode* c_parent
- cdef xmlNode* c_root
- if c_doc != c_base_doc:
- c_root = tree.xmlDocGetRootElement(c_doc)
-
- # restore parent pointers of children
- c_parent = c_root._private
- c_child = c_root.children
- while c_child is not NULL:
- c_child.parent = c_parent
- c_child = c_child.next
-
- # prevent recursive removal of children
- c_root.children = c_root.last = c_root._private = NULL
- tree.xmlFreeDoc(c_doc)
-
-cdef object _attributeValue(xmlNode* c_element, xmlNode* c_attrib_node):
- cdef char* value
- if c_attrib_node.ns is NULL or c_attrib_node.ns.href is NULL:
- value = tree.xmlGetNoNsProp(c_element, c_attrib_node.name)
- else:
- value = tree.xmlGetNsProp(c_element, c_attrib_node.name,
- c_attrib_node.ns.href)
- return funicode(value)
+# node dump functions (mainly for debug)
cdef _dumpToFile(f, xmlDoc* c_doc, xmlNode* c_node):
cdef python.PyObject* o
@@ -280,333 +179,3 @@
c_next = c_node.next
if c_next is not NULL and c_next.type == tree.XML_TEXT_NODE:
tree.xmlNodeDumpOutput(c_buffer, c_doc, c_next, 0, 0, encoding)
-
-cdef object __REPLACE_XML_ENCODING
-__REPLACE_XML_ENCODING = re.compile(
- r'^(\s*<\?\s*xml[^>]+)\s+encoding\s*=\s*"[^"]*"\s*', re.U).sub
-
-cdef object _stripEncodingDeclaration(object xml_string):
- # this is a hack to remove the XML encoding declaration from unicode
- return __REPLACE_XML_ENCODING(r'\g<1>', xml_string)
-
-cdef object _stripDeclaration(object xml_string):
- # this is a hack to remove the XML declaration when we encode to UTF-8
- xml_string = xml_string.strip()
- if xml_string[:5] == '')
- if i != -1:
- i = i + 2
- while xml_string[i:i+1] in '\n\r ':
- i = i+1
- xml_string = xml_string[i:]
- return xml_string
-
-cdef _collectText(xmlNode* c_node):
- """Collect all text nodes and return them as a unicode string.
-
- Start collecting at c_node.
-
- If there was no text to collect, return None
- """
- cdef Py_ssize_t scount
- cdef char* text
- cdef xmlNode* c_node_cur
- # check for multiple text nodes
- scount = 0
- text = NULL
- c_node_cur = c_node
- while c_node_cur is not NULL and c_node_cur.type == tree.XML_TEXT_NODE:
- if c_node_cur.content[0] != c'\0':
- text = c_node_cur.content
- scount = scount + 1
- c_node_cur = c_node_cur.next
-
- # handle two most common cases first
- if text is NULL:
- return None
- if scount == 1:
- return funicode(text)
-
- # the rest is not performance critical anymore
- result = ''
- while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE:
- result = result + c_node.content
- c_node = c_node.next
- return funicode(result)
-
-cdef _removeText(xmlNode* c_node):
- """Remove all text nodes.
-
- Start removing at c_node.
- """
- cdef xmlNode* c_next
- while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE:
- c_next = c_node.next
- tree.xmlUnlinkNode(c_node)
- # XXX cannot safely free in case of direct text node proxies..
- tree.xmlFreeNode(c_node)
- c_node = c_next
-
-cdef xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index):
- if index < 0:
- return _findChildBackwards(c_node, -index - 1)
- else:
- return _findChildForwards(c_node, index)
-
-cdef xmlNode* _findChildForwards(xmlNode* c_node, Py_ssize_t index):
- """Return child element of c_node with index, or return NULL if not found.
- """
- cdef xmlNode* c_child
- cdef Py_ssize_t c
- c_child = c_node.children
- c = 0
- while c_child is not NULL:
- if _isElement(c_child):
- if c == index:
- return c_child
- c = c + 1
- c_child = c_child.next
- return NULL
-
-cdef xmlNode* _findChildBackwards(xmlNode* c_node, Py_ssize_t index):
- """Return child element of c_node with index, or return NULL if not found.
- Search from the end.
- """
- cdef xmlNode* c_child
- cdef Py_ssize_t c
- c_child = c_node.last
- c = 0
- while c_child is not NULL:
- if _isElement(c_child):
- if c == index:
- return c_child
- c = c + 1
- c_child = c_child.prev
- return NULL
-
-cdef xmlNode* _nextElement(xmlNode* c_node):
- """Given a node, find the next sibling that is an element.
- """
- c_node = c_node.next
- while c_node is not NULL:
- if _isElement(c_node):
- return c_node
- c_node = c_node.next
- return NULL
-
-cdef xmlNode* _previousElement(xmlNode* c_node):
- """Given a node, find the next sibling that is an element.
- """
- c_node = c_node.prev
- while c_node is not NULL:
- if _isElement(c_node):
- return c_node
- c_node = c_node.prev
- return NULL
-
-cdef xmlNode* _findDepthFirstInDescendents(xmlNode* c_node,
- char* c_href, char* c_name):
- if c_node is NULL:
- return NULL
- c_node = c_node.children
- if c_node is NULL:
- return NULL
- if not _isElement(c_node):
- c_node = _nextElement(c_node)
- return _findDepthFirstInFollowing(c_node, c_href, c_name)
-
-cdef xmlNode* _findDepthFirstInFollowingSiblings(xmlNode* c_node,
- char* c_href, char* c_name):
- if c_node is NULL:
- return NULL
- c_node = _nextElement(c_node)
- return _findDepthFirstInFollowing(c_node, c_href, c_name)
-
-cdef xmlNode* _findDepthFirstInFollowing(xmlNode* c_node,
- char* c_href, char* c_name):
- """Find the next matching node by traversing:
- 1) the node itself
- 2) its descendents
- 3) its following siblings.
- """
- cdef xmlNode* c_child
- if c_name is NULL:
- # always match
- return c_node
- while c_node is not NULL:
- if _tagMatches(c_node, c_href, c_name):
- return c_node
- if c_node.children is not NULL:
- c_child = _findDepthFirstInFollowing(c_node.children, c_href, c_name)
- if c_child is not NULL:
- return c_child
- c_node = _nextElement(c_node)
- return NULL
-
-cdef int _tagMatches(xmlNode* c_node, char* c_href, char* c_name):
- if c_name is NULL:
- # always match
- return 1
- if c_href is NULL:
- if c_node.ns is not NULL and c_node.ns.href is not NULL:
- return 0
- return cstd.strcmp(c_node.name, c_name) == 0
- elif c_node.ns is NULL or c_node.ns.href is NULL:
- return 0
- else:
- return cstd.strcmp(c_node.name, c_name) == 0 and \
- cstd.strcmp(c_node.ns.href, c_href) == 0
-
-cdef void _removeNode(xmlNode* c_node):
- """Unlink and free a node and subnodes if possible.
- """
- tree.xmlUnlinkNode(c_node)
- attemptDeallocation(c_node)
-
-cdef void _moveTail(xmlNode* c_tail, xmlNode* c_target):
- cdef xmlNode* c_next
- # tail support: look for any text nodes trailing this node and
- # move them too
- while c_tail is not NULL and c_tail.type == tree.XML_TEXT_NODE:
- c_next = c_tail.next
- tree.xmlUnlinkNode(c_tail)
- tree.xmlAddNextSibling(c_target, c_tail)
- c_target = c_tail
- c_tail = c_next
-
-cdef xmlNode* _deleteSlice(xmlNode* c_node, Py_ssize_t start, Py_ssize_t stop):
- """Delete slice, starting with c_node, start counting at start, end at stop.
- """
- cdef xmlNode* c_next
- cdef Py_ssize_t c
- if c_node is NULL:
- return NULL
- # now start deleting nodes
- c = start
- while c_node is not NULL and c < stop:
- c_next = c_node.next
- if _isElement(c_node):
- _removeText(c_node.next)
- c_next = c_node.next
- _removeNode(c_node)
- c = c + 1
- c_node = c_next
- return c_node
-
-cdef int isutf8(char* s):
- cdef char c
- c = s[0]
- while c != c'\0':
- if c & 0x80:
- return 1
- s = s + 1
- c = s[0]
- return 0
-
-cdef object funicode(char* s):
- if isutf8(s):
- return python.PyUnicode_DecodeUTF8(s, cstd.strlen(s), NULL)
- return python.PyString_FromString(s)
-
-cdef object _utf8(object s):
- if python.PyString_Check(s):
- assert not isutf8(_cstr(s)), "All strings must be Unicode or ASCII"
- return s
- elif python.PyUnicode_Check(s):
- return python.PyUnicode_AsUTF8String(s)
- else:
- raise TypeError, "Argument must be string or unicode."
-
-cdef _getNsTag(tag):
- """Given a tag, find namespace URI and tag name.
- Return None for NS uri if no namespace URI available.
- """
- cdef char* c_tag
- cdef char* c_pos
- cdef int nslen
- if isinstance(tag, QName):
- tag = (tag).text
- tag = _utf8(tag)
- c_tag = _cstr(tag)
- if c_tag[0] == c'{':
- c_pos = tree.xmlStrchr(c_tag+1, c'}')
- if c_pos is NULL:
- raise ValueError, "Invalid tag name"
- nslen = c_pos - c_tag - 1
- ns = python.PyString_FromStringAndSize(c_tag+1, nslen)
- tag = python.PyString_FromString(c_pos+1)
- else:
- ns = None
- return ns, tag
-
-cdef object _namespacedName(xmlNode* c_node):
- cdef char* href
- cdef char* name
- name = c_node.name
- if c_node.ns is NULL or c_node.ns.href is NULL:
- return funicode(name)
- else:
- href = c_node.ns.href
- s = python.PyString_FromFormat("{%s}%s", href, name)
- if isutf8(href) or isutf8(name):
- return python.PyUnicode_FromEncodedObject(s, 'UTF-8', NULL)
- else:
- return s
-
-cdef _getFilenameForFile(source):
- """Given a Python File or Gzip object, give filename back.
-
- Returns None if not a file object.
- """
- # file instances have a name attribute
- if hasattr(source, 'name'):
- return source.name
- # gzip file instances have a filename attribute
- if hasattr(source, 'filename'):
- return source.filename
- # urllib2
- if hasattr(source, 'geturl'):
- return source.geturl()
- return None
-
-cdef void changeDocumentBelow(_NodeBase node, _Document doc, int recursive):
- """For a node and all nodes below, change document.
-
- A node can change document in certain operations as an XML
- subtree can move. This updates all possible proxies in the
- tree below (including the current node). It also reconciliates
- namespaces so they're correct inside the new environment.
- """
- if recursive:
- changeDocumentBelowHelper(node._c_node, doc)
- tree.xmlReconciliateNs(doc._c_doc, node._c_node)
-
-cdef void changeDocumentBelowHelper(xmlNode* c_node, _Document doc):
- cdef ProxyRef* ref
- cdef xmlNode* c_current
- cdef xmlAttr* c_attr_current
- cdef _NodeBase proxy
-
- if c_node is NULL:
- return
- # different _c_doc
- c_node.doc = doc._c_doc
-
- if c_node._private is not NULL:
- ref = c_node._private
- while ref is not NULL:
- proxy = <_NodeBase>ref.proxy
- proxy._doc = doc
- ref = ref.next
-
- # adjust all children
- c_current = c_node.children
- while c_current is not NULL:
- changeDocumentBelowHelper(c_current, doc)
- c_current = c_current.next
-
- # adjust all attributes
- c_attr_current = c_node.properties
- while c_attr_current is not NULL:
- changeDocumentBelowHelper(c_current, doc)
- c_attr_current = c_attr_current.next
From scoder at codespeak.net Wed May 17 13:16:31 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 13:16:31 +0200 (CEST)
Subject: [Lxml-checkins] r27343 - lxml/trunk/src/lxml
Message-ID: <20060517111631.2FCDE10064@code0.codespeak.net>
Author: scoder
Date: Wed May 17 13:16:30 2006
New Revision: 27343
Modified:
lxml/trunk/src/lxml/xslt.pxi
Log:
fixed potential memory leak on exception in _XSLTResultTree.__str__
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Wed May 17 13:16:30 2006
@@ -395,8 +395,10 @@
if s is NULL:
return ''
# we must not use 'funicode' here as this is not always UTF-8
- result = python.PyString_FromStringAndSize(s, l)
- tree.xmlFree(s)
+ try:
+ result = python.PyString_FromStringAndSize(s, l)
+ finally:
+ tree.xmlFree(s)
return result
def __unicode__(self):
@@ -409,8 +411,10 @@
encoding = self._xslt._c_style.encoding
if encoding is NULL:
encoding = 'ascii'
- result = python.PyUnicode_Decode(s, l, encoding, 'strict')
- tree.xmlFree(s)
+ try:
+ result = python.PyUnicode_Decode(s, l, encoding, 'strict')
+ finally:
+ tree.xmlFree(s)
return _stripEncodingDeclaration(result)
cdef _xsltResultTreeFactory(_Document doc, XSLT xslt):
From faassen at codespeak.net Wed May 17 13:28:33 2006
From: faassen at codespeak.net (faassen at codespeak.net)
Date: Wed, 17 May 2006 13:28:33 +0200 (CEST)
Subject: [Lxml-checkins] r27345 - lxml/branch/lxml-0.9.x/doc
Message-ID: <20060517112833.A996310063@code0.codespeak.net>
Author: faassen
Date: Wed May 17 13:28:31 2006
New Revision: 27345
Modified:
lxml/branch/lxml-0.9.x/doc/build.txt
Log:
Fix ReST errors.
Modified: lxml/branch/lxml-0.9.x/doc/build.txt
==============================================================================
--- lxml/branch/lxml-0.9.x/doc/build.txt (original)
+++ lxml/branch/lxml-0.9.x/doc/build.txt Wed May 17 13:28:31 2006
@@ -105,25 +105,25 @@
Your directory should now have something like the following files in it::
-iconv-1.9.1.win32.zip
-libxml2-2.6.23.win32.zip
-libxslt-1.1.15.win32.zip
-lxml-0.9.2.tgz
-zlib-1.2.3.win32.zip
+ iconv-1.9.1.win32.zip
+ libxml2-2.6.23.win32.zip
+ libxslt-1.1.15.win32.zip
+ lxml-0.9.2.tgz
+ zlib-1.2.3.win32.zip
Now extract each of those files in the _same_ directory. Now you should have
something like this::
-iconv-1.9.1.win32/
-iconv-1.9.1.win32.zip
-libxml2-2.6.23.win32/
-libxml2-2.6.23.win32.zip
-libxslt-1.1.15.win32/
-libxslt-1.1.15.win32.zip
-lxml-0.9.2/
-lxml-0.9.2.tgz
-zlib-1.2.3.win32/
-zlib-1.2.3.win32.zip
+ iconv-1.9.1.win32/
+ iconv-1.9.1.win32.zip
+ libxml2-2.6.23.win32/
+ libxml2-2.6.23.win32.zip
+ libxslt-1.1.15.win32/
+ libxslt-1.1.15.win32.zip
+ lxml-0.9.2/
+ lxml-0.9.2.tgz
+ zlib-1.2.3.win32/
+ zlib-1.2.3.win32.zip
Go to the lxml-0.9.2 directory and edit the file ``setup.py``. There should be
a section that looks like this::
From faassen at codespeak.net Wed May 17 13:28:41 2006
From: faassen at codespeak.net (faassen at codespeak.net)
Date: Wed, 17 May 2006 13:28:41 +0200 (CEST)
Subject: [Lxml-checkins] r27346 - lxml/branch/lxml-0.9.x
Message-ID: <20060517112841.A58BD10063@code0.codespeak.net>
Author: faassen
Date: Wed May 17 13:28:40 2006
New Revision: 27346
Modified:
lxml/branch/lxml-0.9.x/CREDITS.txt
Log:
Update credits.
Modified: lxml/branch/lxml-0.9.x/CREDITS.txt
==============================================================================
--- lxml/branch/lxml-0.9.x/CREDITS.txt (original)
+++ lxml/branch/lxml-0.9.x/CREDITS.txt Wed May 17 13:28:40 2006
@@ -7,7 +7,8 @@
Stefan Behnel - core development work (SAX support, misc patches)
-Olivier Grisel - improved (c)ElementTree compatibility patches
+Olivier Grisel - improved (c)ElementTree compatibility patches,
+ website improvements.
Florian Wagner - help with copy.deepcopy support, bug reporting
@@ -41,6 +42,10 @@
Trent Mick - setup.py patch
+Steve Howe - Windows builds
+
+David Sankel - building statically on Windows
+
Thanks also to:
* the libxml2 project for a great XML library.
@@ -53,4 +58,3 @@
Holger Krekel for hosting it on codespeak.net
* Infrae for initiating the project.
-
From scoder at codespeak.net Wed May 17 13:44:40 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 13:44:40 +0200 (CEST)
Subject: [Lxml-checkins] r27347 - in lxml/trunk: . src/lxml
Message-ID: <20060517114440.BDDE210063@code0.codespeak.net>
Author: scoder
Date: Wed May 17 13:44:39 2006
New Revision: 27347
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/xslt.pxi
Log:
register all libxslt extra functions (document, write, debug, output)
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed May 17 13:44:39 2006
@@ -44,7 +44,8 @@
* Implementation of exslt:regexp for XSLT based on the Python 're' module,
enabled by default, can be switched off with 'regexp=False' keyword argument
-* Support for exslt extensions (libexslt) and node-set function
+* Support for exslt extensions (libexslt) and libxslt extra functions
+ (node-set, document, write, output)
* Substantial speedup in XPath.evaluate()
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Wed May 17 13:44:39 2006
@@ -423,18 +423,9 @@
result._xslt = xslt
return result
-# do not register all libxslt extra functions, provide only "node-set"
-# functions like "output" and "write" are a potential security risk
-#xslt.xsltRegisterAllExtras()
-xslt.xsltRegisterExtModuleFunction("node-set",
- xslt.XSLT_LIBXSLT_NAMESPACE,
- xslt.xsltFunctionNodeSet)
-xslt.xsltRegisterExtModuleFunction("node-set",
- xslt.XSLT_SAXON_NAMESPACE,
- xslt.xsltFunctionNodeSet)
-xslt.xsltRegisterExtModuleFunction("node-set",
- xslt.XSLT_XT_NAMESPACE,
- xslt.xsltFunctionNodeSet)
+# functions like "output" and "write" are a potential security risk, but we
+# rely on the user to configure XSLTAccessControl as needed
+xslt.xsltRegisterAllExtras()
# enable EXSLT support for XSLT
xslt.exsltRegisterAll()
From scoder at codespeak.net Wed May 17 13:51:54 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 13:51:54 +0200 (CEST)
Subject: [Lxml-checkins] r27348 - lxml/trunk/doc
Message-ID: <20060517115154.DC51610063@code0.codespeak.net>
Author: scoder
Date: Wed May 17 13:51:53 2006
New Revision: 27348
Modified:
lxml/trunk/doc/api.txt
Log:
short note on EXSLT etc. support in XSLT
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Wed May 17 13:51:53 2006
@@ -333,6 +333,13 @@
>>> str(result)
'\nA \n'
+By default, XSLT supports all extension functions from libxslt and libexslt as
+well as Python regular expressions through EXSLT. Note that some extensions
+enable style sheets to read and write files on the local file system. See the
+`document loader documentation`_ on how to deal with this.
+
+.. _`resolver documentation`: resolvers.html
+
RelaxNG
-------
From scoder at codespeak.net Wed May 17 14:02:11 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 14:02:11 +0200 (CEST)
Subject: [Lxml-checkins] r27349 - lxml/trunk/doc
Message-ID: <20060517120211.B2DF410063@code0.codespeak.net>
Author: scoder
Date: Wed May 17 14:02:08 2006
New Revision: 27349
Modified:
lxml/trunk/doc/main.txt
Log:
typo
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Wed May 17 14:02:08 2006
@@ -89,7 +89,7 @@
simple way to write arbitrary XML driven APIs on top of lxml.
lxml also offers a `SAX compliant API`_, that works with the SAX support
-in the standar dlibrary.
+in the standard library.
.. _`ElementTree API`: http://effbot.org/zone/element-index.htm
.. _`ElementTree compatibility overview`: compatibility.html
From faassen at codespeak.net Wed May 17 14:17:03 2006
From: faassen at codespeak.net (faassen at codespeak.net)
Date: Wed, 17 May 2006 14:17:03 +0200 (CEST)
Subject: [Lxml-checkins] r27351 - lxml/branch/lxml-0.9.x/doc
Message-ID: <20060517121703.5B31110063@code0.codespeak.net>
Author: faassen
Date: Wed May 17 14:17:01 2006
New Revision: 27351
Modified:
lxml/branch/lxml-0.9.x/doc/main.txt
Log:
Fix typo.
Modified: lxml/branch/lxml-0.9.x/doc/main.txt
==============================================================================
--- lxml/branch/lxml-0.9.x/doc/main.txt (original)
+++ lxml/branch/lxml-0.9.x/doc/main.txt Wed May 17 14:17:01 2006
@@ -89,7 +89,7 @@
simple way to write arbitrary XML driven APIs on top of lxml.
lxml also offers a `SAX compliant API`_, that works with the SAX support
-in the standar dlibrary.
+in the standard library.
.. _`ElementTree API`: http://effbot.org/zone/element-index.htm
.. _`ElementTree compatibility overview`: compatibility.html
From faassen at codespeak.net Wed May 17 14:17:14 2006
From: faassen at codespeak.net (faassen at codespeak.net)
Date: Wed, 17 May 2006 14:17:14 +0200 (CEST)
Subject: [Lxml-checkins] r27352 - lxml/www
Message-ID: <20060517121714.2E95010063@code0.codespeak.net>
Author: faassen
Date: Wed May 17 14:17:13 2006
New Revision: 27352
Modified:
lxml/www/publish.py
Log:
Add another file to produce.
Modified: lxml/www/publish.py
==============================================================================
--- lxml/www/publish.py (original)
+++ lxml/www/publish.py Wed May 17 14:17:13 2006
@@ -8,7 +8,8 @@
shutil.copy(stylesheet_url, dirname)
for name in ['main.txt', 'intro.txt', 'api.txt', 'compatibility.txt',
- 'extensions.txt', 'namespace_extensions.txt', 'sax.txt']:
+ 'extensions.txt', 'namespace_extensions.txt', 'sax.txt',
+ 'build.txt']:
path = os.path.join(lxml_path, 'doc', name)
outname = os.path.splitext(name)[0] + '.html'
outpath = os.path.join(dirname, outname)
From scoder at codespeak.net Wed May 17 14:25:08 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 14:25:08 +0200 (CEST)
Subject: [Lxml-checkins] r27353 - lxml/trunk
Message-ID: <20060517122508.28BC110063@code0.codespeak.net>
Author: scoder
Date: Wed May 17 14:25:06 2006
New Revision: 27353
Modified:
lxml/trunk/MANIFEST.in
Log:
include generated .html pages in source tgz
Modified: lxml/trunk/MANIFEST.in
==============================================================================
--- lxml/trunk/MANIFEST.in (original)
+++ lxml/trunk/MANIFEST.in Wed May 17 14:25:06 2006
@@ -1,5 +1,5 @@
include setup.py MANIFEST.in *.txt
recursive-include src *.pyx *.pxd *.pxi *.py etree.c etree.h
recursive-include src/lxml/tests *.rng *.xslt *.xml
-recursive-include doc *.txt *.xml *.mgp
+recursive-include doc *.txt *.html *.xml *.mgp
exclude doc/pyrex.txt
From scoder at codespeak.net Wed May 17 14:25:22 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 14:25:22 +0200 (CEST)
Subject: [Lxml-checkins] r27354 - lxml/trunk/doc
Message-ID: <20060517122522.74C5C10063@code0.codespeak.net>
Author: scoder
Date: Wed May 17 14:25:21 2006
New Revision: 27354
Modified:
lxml/trunk/doc/api.txt
lxml/trunk/doc/compatibility.txt
lxml/trunk/doc/main.txt
Log:
doc re-reads and fixes
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Wed May 17 14:25:21 2006
@@ -9,10 +9,11 @@
----------
lxml.etree tries to follow the `ElementTree API`_ wherever it can. There are
-however some incompatibilities (see compatibility.txt). The extensions are
+however some incompatibilities (see `compatibility`_). The extensions are
documented here.
.. _`ElementTree API`: http://effbot.org/zone/element-index.htm
+.. _`compatibility`: compatibility.html
If you need to know which version of lxml is installed, you can access the
``lxml.etree.LXML_VERSION`` attribute to retrieve a version tuple. Note,
@@ -338,7 +339,7 @@
enable style sheets to read and write files on the local file system. See the
`document loader documentation`_ on how to deal with this.
-.. _`resolver documentation`: resolvers.html
+.. _`document loader documentation`: resolvers.html
RelaxNG
Modified: lxml/trunk/doc/compatibility.txt
==============================================================================
--- lxml/trunk/doc/compatibility.txt (original)
+++ lxml/trunk/doc/compatibility.txt Wed May 17 14:25:21 2006
@@ -36,17 +36,17 @@
In most parts of the API, ElementTree uses plain strings and unicode strings
as what they are. This includes Element.text, Element.tail and many other
places. However, the ElementTree parsers assume by default that any string
- (`str` or `unicode`) contains ASCII data and raise an exception if strings
- do not match the expected encoding.
+ (`str` or `unicode`) contains ASCII data. They raise an exception if
+ strings do not match the expected encoding.
etree has the same idea about plain strings (`str`) as ElementTree. For
unicode strings, however, etree assumes throughout the API that they are
Python unicode encoded strings rather than byte data. This includes the
parsers. It is therefore perfectly correct to pass XML unicode data into
the etree parsers in form of Python unicode strings. It is an error, on the
- other hand, if unicode strings specify an encoding in their XML declaration.
- Note also that Python unicode strings are platform specific. Such an
- encoding specifier would not be portable.
+ other hand, if unicode strings specify an encoding in their XML declaration,
+ as this conflicts with the characteristic encoding of Python unicode
+ strings.
* ElementTree allows you to place an Element in two different trees as the
same time. Thus, this::
@@ -114,7 +114,7 @@
like ElementTree's. copy.copy() however does *not* create a shallow copy
where elements are shared between trees, as this makes no sense in the
context of libxml2 trees. Note that lxml can deep-copy trees considerably
- faster than than ElementTree.
+ faster than ElementTree.
* etree allows navigation to the parent of a node by the ``getparent()``
method. This is not possible in ElementTree as the underlying tree model
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Wed May 17 14:25:21 2006
@@ -99,7 +99,7 @@
.. _`Relax NG`: http://www.relaxng.org/
.. _`XML Schema`: http://www.w3.org/XML/Schema
.. _`XSLT`: http://www.w3.org/TR/xslt
-.. _`c14n`: http://www.w3.org/TR/2001/REC-xml-c14n-20010315
+.. _`c14n`: http://www.w3.org/TR/xml-c14n
.. _`implementing namespaces`: namespace_extensions.html
.. _`SAX compliant API`: sax.html
From scoder at codespeak.net Wed May 17 14:35:44 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 14:35:44 +0200 (CEST)
Subject: [Lxml-checkins] r27356 - lxml/trunk/doc
Message-ID: <20060517123544.6FDA010063@code0.codespeak.net>
Author: scoder
Date: Wed May 17 14:35:43 2006
New Revision: 27356
Modified:
lxml/trunk/doc/build.txt
Log:
doc fixes
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Wed May 17 14:35:43 2006
@@ -103,7 +103,8 @@
libxml2, libxslt, and zlib libraries are all available from xmlsoft.org. The
place to go on the ftp site is ftp://xmlsoft.org/libxml2/win32.
-Your directory should now have something like the following files in it::
+Your directory should now have the following files in it (although possibly
+different versions)::
iconv-1.9.1.win32.zip
libxml2-2.6.23.win32.zip
@@ -111,7 +112,7 @@
lxml-0.9.2.tgz
zlib-1.2.3.win32.zip
-Now extract each of those files in the _same_ directory. Now you should have
+Now extract each of those files in the *same* directory. Now you should have
something like this::
iconv-1.9.1.win32/
From scoder at codespeak.net Wed May 17 14:39:35 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 14:39:35 +0200 (CEST)
Subject: [Lxml-checkins] r27358 - in lxml/trunk: . doc
Message-ID: <20060517123935.BA79710068@code0.codespeak.net>
Author: scoder
Date: Wed May 17 14:39:34 2006
New Revision: 27358
Modified:
lxml/trunk/CREDITS.txt
lxml/trunk/doc/build.txt
Log:
merged in doc updates from branch
Modified: lxml/trunk/CREDITS.txt
==============================================================================
--- lxml/trunk/CREDITS.txt (original)
+++ lxml/trunk/CREDITS.txt Wed May 17 14:39:34 2006
@@ -7,7 +7,8 @@
Stefan Behnel - core development work (SAX support, misc patches)
-Olivier Grisel - improved (c)ElementTree compatibility patches
+Olivier Grisel - improved (c)ElementTree compatibility patches,
+ website improvements.
Florian Wagner - help with copy.deepcopy support, bug reporting
@@ -41,6 +42,10 @@
Trent Mick - setup.py patch
+Steve Howe - Windows builds
+
+David Sankel - building statically on Windows
+
Thanks also to:
* the libxml2 project for a great XML library.
@@ -53,4 +58,3 @@
Holger Krekel for hosting it on codespeak.net
* Infrae for initiating the project.
-
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Wed May 17 14:39:34 2006
@@ -106,25 +106,25 @@
Your directory should now have the following files in it (although possibly
different versions)::
-iconv-1.9.1.win32.zip
-libxml2-2.6.23.win32.zip
-libxslt-1.1.15.win32.zip
-lxml-0.9.2.tgz
-zlib-1.2.3.win32.zip
+ iconv-1.9.1.win32.zip
+ libxml2-2.6.23.win32.zip
+ libxslt-1.1.15.win32.zip
+ lxml-0.9.2.tgz
+ zlib-1.2.3.win32.zip
Now extract each of those files in the *same* directory. Now you should have
something like this::
-iconv-1.9.1.win32/
-iconv-1.9.1.win32.zip
-libxml2-2.6.23.win32/
-libxml2-2.6.23.win32.zip
-libxslt-1.1.15.win32/
-libxslt-1.1.15.win32.zip
-lxml-0.9.2/
-lxml-0.9.2.tgz
-zlib-1.2.3.win32/
-zlib-1.2.3.win32.zip
+ iconv-1.9.1.win32/
+ iconv-1.9.1.win32.zip
+ libxml2-2.6.23.win32/
+ libxml2-2.6.23.win32.zip
+ libxslt-1.1.15.win32/
+ libxslt-1.1.15.win32.zip
+ lxml-0.9.2/
+ lxml-0.9.2.tgz
+ zlib-1.2.3.win32/
+ zlib-1.2.3.win32.zip
Go to the lxml-0.9.2 directory and edit the file ``setup.py``. There should be
a section that looks like this::
From scoder at codespeak.net Wed May 17 14:43:06 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 14:43:06 +0200 (CEST)
Subject: [Lxml-checkins] r27359 - lxml/trunk/doc
Message-ID: <20060517124306.D0CFB1006B@code0.codespeak.net>
Author: scoder
Date: Wed May 17 14:43:05 2006
New Revision: 27359
Modified:
lxml/trunk/doc/build.txt
Log:
doc fixes
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Wed May 17 14:43:05 2006
@@ -112,7 +112,7 @@
lxml-0.9.2.tgz
zlib-1.2.3.win32.zip
-Now extract each of those files in the *same* directory. Now you should have
+Now extract each of those files in the *same* directory. This should give you
something like this::
iconv-1.9.1.win32/
From scoder at codespeak.net Wed May 17 15:01:39 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 17 May 2006 15:01:39 +0200 (CEST)
Subject: [Lxml-checkins] r27362 - lxml/trunk
Message-ID: <20060517130139.588A510060@code0.codespeak.net>
Author: scoder
Date: Wed May 17 15:01:37 2006
New Revision: 27362
Modified:
lxml/trunk/TODO.txt
Log:
cleanup in TODO.txt
Modified: lxml/trunk/TODO.txt
==============================================================================
--- lxml/trunk/TODO.txt (original)
+++ lxml/trunk/TODO.txt Wed May 17 15:01:37 2006
@@ -6,24 +6,10 @@
* potential threading issues in XPath extension functions?
-* Python extension functions, threading issues.
-
-* Improved Relax NG error reporting. Right now we only get valid or invalid.
-
-* Improved XML Schema error reporting. Right now we only get valid or invalid.
-
-* Improved error handling in general; test structured exceptions in more
- detail.
-
* See whether XInclude support can mimic ElementTree's API.
* Test XML entities, also in an ElementTree context.
-* Support for loading files from other places than filesystem, for
- instance xslt:include, xslt:import, XInclude, Relax NG import.
-
-* More tests for error handling.
-
In general
----------
@@ -31,43 +17,24 @@
* will namespaces nodes of unknown namespaces be added (and never freed?)
-* Various (c)ElementTree builders and parser APIs. Are they needed?
-
* iterparse support would be nice.
-* memory errors and memory leaks when returning nodes from XPath
- extension functions.
-
Top level
---------
-* parse() support for custom parsers. (?)
-
* ProcessingInstruction
-* XMLID
-
ElementInterface
-----------------
-* improve getiterator() implementation to use Python-level iterators
-
ElementTree
-----------
* _setroot(), even though this is not strictly a public method.
-* parse() - this seems hard to implement sanely so this may be an
- incompatibility.
-
-* improve write() and write_c14n() support to use file pointers
- directly where possible, instead of going through memory.
-
QName
-----
-Not yet implemented.
-
Features
--------
From scoder at codespeak.net Thu May 18 08:38:23 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 08:38:23 +0200 (CEST)
Subject: [Lxml-checkins] r27383 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20060518063823.C91BA10063@code0.codespeak.net>
Author: scoder
Date: Thu May 18 08:38:20 2006
New Revision: 27383
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/python.pxd
lxml/trunk/src/lxml/tests/test_etree.py
lxml/trunk/src/lxml/tree.pxd
lxml/trunk/src/lxml/xmlwriter.pxi
Log:
support XML pretty printing in output functions, major cleanup in xmlwriter.pxi
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu May 18 08:38:20 2006
@@ -7,6 +7,8 @@
Features added
--------------
+* Formatted output via ``pretty_print`` keyword to serialization functions
+
* XSLT can block access to file system and network via ``XSLTAccessControl``
* ElementTree.write() no longer serializes in memory (reduced memory
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Thu May 18 08:38:20 2006
@@ -348,7 +348,7 @@
def __get__(self):
return DocInfo(self._doc)
- def write(self, file, encoding='us-ascii'):
+ def write(self, file, encoding='us-ascii', pretty_print=False):
if encoding in ('utf8', 'UTF8', 'utf-8'):
encoding = 'UTF-8'
if encoding == 'UTF-8' or encoding == 'us-ascii':
@@ -356,7 +356,8 @@
write_declaration = 0
else:
write_declaration = 1
- _tofile(file, self._context_node, encoding, write_declaration)
+ _tofilelike(file, self._context_node, encoding,
+ write_declaration, bool(pretty_print))
def getiterator(self, tag=None):
root = self.getroot()
@@ -1405,12 +1406,15 @@
return isinstance(element, _Element)
def dump(_NodeBase elem not None):
- _dumpToFile(sys.stdout, elem._doc._c_doc, elem._c_node)
+ _dumpToFile(sys.stdout, elem._c_node)
-def tostring(element_or_tree, encoding='us-ascii', xml_declaration=None):
+def tostring(element_or_tree, encoding='us-ascii',
+ xml_declaration=None, pretty_print=False):
"Serialize an element to an encoded string representation of its XML tree."
cdef int write_declaration
+ cdef int c_pretty_print
encoding = str(encoding)
+ c_pretty_print = bool(pretty_print)
if xml_declaration is None:
# by default, write an XML declaration only for non-standard encodings
write_declaration = (encoding != 'us-ascii')
@@ -1419,24 +1423,27 @@
if isinstance(element_or_tree, _NodeBase):
return _tostring(<_NodeBase>element_or_tree,
- encoding, write_declaration)
+ encoding, write_declaration, c_pretty_print)
elif isinstance(element_or_tree, _ElementTree):
return _tostring((<_ElementTree>element_or_tree)._context_node,
- encoding, write_declaration)
+ encoding, write_declaration, c_pretty_print)
else:
raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree)
-def tounicode(element_or_tree):
+def tounicode(element_or_tree, pretty_print=False):
"""Serialize an element to the Python unicode representation of its XML
tree.
Note that the result does not carry an XML encoding declaration and is
therefore not necessarily suited for serialization without further
treatment."""
+ cdef int c_pretty_print
+ c_pretty_print = bool(pretty_print)
if isinstance(element_or_tree, _NodeBase):
- return _tounicode(<_NodeBase>element_or_tree)
+ return _tounicode(<_NodeBase>element_or_tree, c_pretty_print)
elif isinstance(element_or_tree, _ElementTree):
- return _tounicode((<_ElementTree>element_or_tree)._context_node)
+ return _tounicode((<_ElementTree>element_or_tree)._context_node,
+ c_pretty_print)
else:
raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree)
Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd (original)
+++ lxml/trunk/src/lxml/python.pxd Thu May 18 08:38:20 2006
@@ -6,7 +6,7 @@
ctypedef int Py_ssize_t
cdef int INT_MAX
- cdef FILE* PyFile_AsFile(PyObject* p)
+ cdef FILE* PyFile_AsFile(object p)
cdef int PyFile_Check(object p)
cdef object PyFile_Name(object p)
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Thu May 18 08:38:20 2006
@@ -449,6 +449,24 @@
tostring = self.etree.tostring
self.assertRaises(TypeError, self.etree.tostring, None)
+ def test_tostring_pretty(self):
+ tostring = self.etree.tostring
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+
+ result = tostring(a)
+ self.assertEquals(result, " ")
+
+ result = tostring(a, pretty_print=False)
+ self.assertEquals(result, " ")
+
+ result = tostring(a, pretty_print=True)
+ self.assertEquals(result, "\n \n \n ")
+
def test_tounicode(self):
tounicode = self.etree.tounicode
Element = self.etree.Element
@@ -497,6 +515,24 @@
self.assert_(tounicode(b) == ' Foo' or
tounicode(b) == ' Foo')
+ def test_tounicode_pretty(self):
+ tounicode = self.etree.tounicode
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+
+ result = tounicode(a)
+ self.assertEquals(result, " ")
+
+ result = tounicode(a, pretty_print=False)
+ self.assertEquals(result, " ")
+
+ result = tounicode(a, pretty_print=True)
+ self.assertEquals(result, "\n \n \n ")
+
def _writeElement(self, element, encoding='us-ascii'):
"""Write out element for comparison.
"""
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Thu May 18 08:38:20 2006
@@ -202,6 +202,7 @@
cdef xmlBuffer* xmlBufferCreate()
cdef char* xmlBufferContent(xmlBuffer* buf)
cdef int xmlBufferLength(xmlBuffer* buf)
+ cdef int xmlKeepBlanksDefault(int val)
cdef extern from "libxml/xmlIO.h":
cdef int xmlOutputBufferWriteString(xmlOutputBuffer* out, char* str)
Modified: lxml/trunk/src/lxml/xmlwriter.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlwriter.pxi (original)
+++ lxml/trunk/src/lxml/xmlwriter.pxi Thu May 18 08:38:20 2006
@@ -1,8 +1,10 @@
# XML serialization and output functions
-cdef _tostring(_NodeBase element, encoding, int write_xml_declaration):
+tree.xmlKeepBlanksDefault(0)
+
+cdef _tostring(_NodeBase element, encoding,
+ int write_xml_declaration, int pretty_print):
"Serialize an element to an encoded string representation of its XML tree."
- cdef _Document doc
cdef tree.xmlOutputBuffer* c_buffer
cdef tree.xmlBuffer* c_result_buffer
cdef tree.xmlCharEncodingHandler* enchandler
@@ -12,7 +14,6 @@
return None
if encoding in ('utf8', 'UTF8', 'utf-8'):
encoding = 'UTF-8'
- doc = element._doc
c_enc = encoding
# it is necessary to *and* find the encoding handler *and* use
# encoding during output
@@ -22,8 +23,8 @@
raise LxmlError, "Failed to create output buffer"
try:
- _writeNodeToBuffer(c_buffer, doc._c_doc, element._c_node,
- doc._c_doc.version, c_enc, write_xml_declaration)
+ _writeNodeToBuffer(c_buffer, element._c_node, c_enc,
+ write_xml_declaration, pretty_print)
tree.xmlOutputBufferFlush(c_buffer)
if c_buffer.conv is not NULL:
c_result_buffer = c_buffer.conv
@@ -36,20 +37,17 @@
tree.xmlOutputBufferClose(c_buffer)
return result
-cdef _tounicode(_NodeBase element):
+cdef _tounicode(_NodeBase element, int pretty_print):
"Serialize an element to the Python unicode representation of its XML tree."
- cdef _Document doc
cdef tree.xmlOutputBuffer* c_buffer
cdef tree.xmlBuffer* c_result_buffer
if element is None:
return None
- doc = element._doc
c_buffer = tree.xmlAllocOutputBuffer(NULL)
if c_buffer is NULL:
raise LxmlError, "Failed to create output buffer"
try:
- _writeNodeToBuffer(c_buffer, doc._c_doc, element._c_node,
- NULL, NULL, 0)
+ _writeNodeToBuffer(c_buffer, element._c_node, NULL, 0, pretty_print)
tree.xmlOutputBufferFlush(c_buffer)
if c_buffer.conv is not NULL:
c_result_buffer = c_buffer.conv
@@ -64,14 +62,15 @@
return result
cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer,
- xmlDoc* c_doc, xmlNode* c_node,
- char* xml_version, char* encoding,
- int write_xml_declaration):
+ xmlNode* c_node, char* encoding,
+ int write_xml_declaration, int pretty_print):
+ cdef xmlDoc* c_doc
+ c_doc = c_node.doc
if write_xml_declaration:
- _writeDeclarationToBuffer(c_buffer, xml_version, encoding)
+ _writeDeclarationToBuffer(c_buffer, c_doc.version, encoding)
- tree.xmlNodeDumpOutput(c_buffer, c_doc, c_node, 0, 0, encoding)
- _dumpNextNode(c_buffer, c_doc, c_node, encoding)
+ tree.xmlNodeDumpOutput(c_buffer, c_doc, c_node, 0, pretty_print, encoding)
+ _writeTail(c_buffer, c_node, encoding, pretty_print)
cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
char* version, char* encoding):
@@ -83,6 +82,16 @@
tree.xmlOutputBufferWriteString(c_buffer, encoding)
tree.xmlOutputBufferWriteString(c_buffer, "'?>\n")
+
+cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
+ char* encoding, int pretty_print):
+ "Write the element tail."
+ c_node = c_node.next
+ while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE:
+ tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0,
+ pretty_print, encoding)
+ c_node = c_node.next
+
# output to file-like objects
cdef class _FileWriter:
@@ -127,7 +136,8 @@
cdef int _closeFilelikeWriter(void* ctxt):
return (<_FileWriter>ctxt).close()
-cdef _tofile(f, _NodeBase element, encoding, int write_declaration):
+cdef _tofilelike(f, _NodeBase element, encoding,
+ int write_xml_declaration, int pretty_print):
cdef _FileWriter writer
cdef tree.xmlOutputBuffer* c_buffer
cdef tree.xmlCharEncodingHandler* enchandler
@@ -148,34 +158,21 @@
else:
raise TypeError, "File or filename expected, got '%s'" % type(f)
- _writeNodeToBuffer(c_buffer,
- element._doc._c_doc, element._c_node,
- element._doc._c_doc.version, c_enc,
- write_declaration)
+ _writeNodeToBuffer(c_buffer, element._c_node, c_enc,
+ write_xml_declaration, pretty_print)
tree.xmlOutputBufferClose(c_buffer)
if writer is not None:
writer._exc_context._raise_if_stored()
-# node dump functions (mainly for debug)
+# dump node to file (mainly for debug)
-cdef _dumpToFile(f, xmlDoc* c_doc, xmlNode* c_node):
- cdef python.PyObject* o
+cdef _dumpToFile(f, xmlNode* c_node):
cdef tree.xmlOutputBuffer* c_buffer
-
if not python.PyFile_Check(f):
raise ValueError, "Not a file"
- o = f
- c_buffer = tree.xmlOutputBufferCreateFile(python.PyFile_AsFile(o), NULL)
- tree.xmlNodeDumpOutput(c_buffer, c_doc, c_node, 0, 0, NULL)
- # dump next node if it's a text node
- _dumpNextNode(c_buffer, c_doc, c_node, NULL)
+ c_buffer = tree.xmlOutputBufferCreateFile(python.PyFile_AsFile(f), NULL)
+ tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, 0, NULL)
+ _writeTail(c_buffer, c_node, NULL, 0)
tree.xmlOutputBufferWriteString(c_buffer, '\n')
tree.xmlOutputBufferFlush(c_buffer)
-
-cdef void _dumpNextNode(tree.xmlOutputBuffer* c_buffer, xmlDoc* c_doc,
- xmlNode* c_node, char* encoding):
- cdef xmlNode* c_next
- c_next = c_node.next
- if c_next is not NULL and c_next.type == tree.XML_TEXT_NODE:
- tree.xmlNodeDumpOutput(c_buffer, c_doc, c_next, 0, 0, encoding)
From scoder at codespeak.net Thu May 18 08:55:39 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 08:55:39 +0200 (CEST)
Subject: [Lxml-checkins] r27384 - lxml/trunk/src/lxml
Message-ID: <20060518065539.6A27910063@code0.codespeak.net>
Author: scoder
Date: Thu May 18 08:55:38 2006
New Revision: 27384
Modified:
lxml/trunk/src/lxml/xmlwriter.pxi
Log:
prettification
Modified: lxml/trunk/src/lxml/xmlwriter.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlwriter.pxi (original)
+++ lxml/trunk/src/lxml/xmlwriter.pxi Thu May 18 08:55:38 2006
@@ -137,7 +137,7 @@
return (<_FileWriter>ctxt).close()
cdef _tofilelike(f, _NodeBase element, encoding,
- int write_xml_declaration, int pretty_print):
+ int write_xml_declaration, int pretty_print):
cdef _FileWriter writer
cdef tree.xmlOutputBuffer* c_buffer
cdef tree.xmlCharEncodingHandler* enchandler
From scoder at codespeak.net Thu May 18 08:59:08 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 08:59:08 +0200 (CEST)
Subject: [Lxml-checkins] r27385 - in lxml/trunk: . src/lxml
Message-ID: <20060518065908.C0F0110063@code0.codespeak.net>
Author: scoder
Date: Thu May 18 08:59:07 2006
New Revision: 27385
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/etree.pyx
Log:
fix: prevent ElementTree methods from treating empty root node (raise AssertionError)
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu May 18 08:59:07 2006
@@ -59,6 +59,9 @@
Bugs fixed
----------
+* Some ElementTree methods could crash if the root node was not initialized
+ (neither file nor element passed to the constructor)
+
* Element/SubElement failed to set attribute namespaces from passed ``attrib``
dictionary
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Thu May 18 08:59:07 2006
@@ -330,6 +330,11 @@
cdef _Document _doc
cdef _NodeBase _context_node
+ # we have to take care here: the document may not have a root node!
+ cdef _assertHasRoot(self):
+ assert self._context_node is not None, \
+ "ElementTree not initialized, missing root"
+
def parse(self, source, parser=None):
"""Updates self with the content of source and returns its root
"""
@@ -349,6 +354,7 @@
return DocInfo(self._doc)
def write(self, file, encoding='us-ascii', pretty_print=False):
+ self._assertHasRoot()
if encoding in ('utf8', 'UTF8', 'utf-8'):
encoding = 'UTF-8'
if encoding == 'UTF-8' or encoding == 'us-ascii':
@@ -366,22 +372,22 @@
return root.getiterator(tag)
def find(self, path):
+ self._assertHasRoot()
root = self.getroot()
- assert root is not None
if path[:1] == "/":
path = "." + path
return root.find(path)
def findtext(self, path, default=None):
+ self._assertHasRoot()
root = self.getroot()
- assert root is not None
if path[:1] == "/":
path = "." + path
return root.findtext(path, default)
def findall(self, path):
+ self._assertHasRoot()
root = self.getroot()
- assert root is not None
if path[:1] == "/":
path = "." + path
return root.findall(path)
@@ -402,6 +408,7 @@
against the same document, it is more efficient to use
XPathEvaluator directly.
"""
+ self._assertHasRoot()
evaluator = XPathElementEvaluator(self._context_node, namespaces)
return evaluator.evaluate(_path, **_variables)
@@ -417,6 +424,7 @@
multiple documents, it is more efficient to use the XSLT
class directly.
"""
+ self._assertHasRoot()
style = XSLT(_xslt, extensions)
return style(self, **_kw)
@@ -432,6 +440,7 @@
multiple documents, it is more efficient to use the RelaxNG
class directly.
"""
+ self._assertHasRoot()
schema = RelaxNG(relaxng)
return schema.validate(self)
@@ -447,6 +456,7 @@
multiple documents, it is more efficient to use the XMLSchema
class directly.
"""
+ self._assertHasRoot()
schema = XMLSchema(xmlschema)
return schema.validate(self)
@@ -460,6 +470,7 @@
# at all. The XInclude nodes appear to be still being in the same
# parent and same document, but they must not be connected to the
# tree..
+ self._assertHasRoot()
result = xinclude.xmlXIncludeProcessTree(self._context_node._c_node)
if result == -1:
raise XIncludeError, "XInclude processing failed"
@@ -471,6 +482,7 @@
cdef xmlDoc* c_doc
cdef char* data
cdef int bytes
+ self._assertHasRoot()
c_base_doc = self._doc._c_doc
c_doc = _fakeRootDoc(c_base_doc, self._context_node._c_node)
From scoder at codespeak.net Thu May 18 10:37:20 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 10:37:20 +0200 (CEST)
Subject: [Lxml-checkins] r27388 - in lxml/trunk: . doc
Message-ID: <20060518083720.AE7FF1006B@code0.codespeak.net>
Author: scoder
Date: Thu May 18 10:37:19 2006
New Revision: 27388
Added:
lxml/trunk/doc/pubkey.asc
Modified:
lxml/trunk/MANIFEST.in
Log:
added public key used for package signing
Modified: lxml/trunk/MANIFEST.in
==============================================================================
--- lxml/trunk/MANIFEST.in (original)
+++ lxml/trunk/MANIFEST.in Thu May 18 10:37:19 2006
@@ -1,5 +1,5 @@
include setup.py MANIFEST.in *.txt
recursive-include src *.pyx *.pxd *.pxi *.py etree.c etree.h
recursive-include src/lxml/tests *.rng *.xslt *.xml
-recursive-include doc *.txt *.html *.xml *.mgp
+recursive-include doc *.txt *.html *.xml *.mgp pubkey.asc
exclude doc/pyrex.txt
Added: lxml/trunk/doc/pubkey.asc
==============================================================================
--- (empty file)
+++ lxml/trunk/doc/pubkey.asc Thu May 18 10:37:19 2006
@@ -0,0 +1,36 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+Version: GnuPG v1.4.2 (GNU/Linux)
+
+mQGiBEQf3JQRBACciSqxoX0q3VurkRENVVtG/pVqtFh/d2CohbVJlLCrO4s7nnPj
+CTfZFt6tmykZjsLJl24XpEJt0O/C0jLcaBqvXVgVvRXHz4DjEYYuQF4LPthhI4MA
+4T7ExptX4lU5g3BVJ46vPU8uRBbbxarBRas9rYewgnrYKWpZZCa7yMq+9wCgnyyR
+Si4E3viLwi77jda135nA6vcD/iqu8zIl9/dFuUcOvxJrhrm+UdY72puZ1TVczSAH
+GOqMjrKkfyHlaJh/ZzWENpTZIfOdVhy7Chvva18vH4Wz7jKj5UeIpRrBvjAD28r3
+Y3W5bfsnpPkvDOyU1vqBsw4q+/250GXEX0JqV2Rbf5yLVgEZPdGrswO460dr4UVS
+8RS0BACYTmyrz57AugHc5tRkqNw6o7ux2deOT0c3AbUcOWtOocGumCsUf+M1nOrc
+VWkeBWTv4HIIiecWYY/KwIemTthQGjxywaZDxOlBT0BOL/+vfYTq/plZULXr+g90
+rSe82+kLl9N5onkBDJKeDIcJDzRoxIRPV1i0Om/5JBI4jmUnv7QnU3RlZmFuIEJl
+aG5lbCA8c2NvZGVyQHVzZXJzLmJlcmxpb3MuZGU+iF8EExECACAFAkQiqKYCGwMG
+CwkIBwMCBBUCCAMEFgIDAQIeAQIXgAAKCRANPVNpCNOgHi+2AJ0a0JH8iP3RqrOL
+JefvHz1dSl3MxACYo7Ma6CeIgsGnyaSSdNOmNVXn+IhGBBARAgAGBQJEIqk0AAoJ
+ELO5mMzzmgZbmCcAoKZ2En1IlsxBpaPPxgWYrUOWfc6hAKCBWODMMOYptCBkSrjg
+m3gsrjHgYbQsU3RlZmFuIEJlaG5lbCA8c2NvZGVyQHVzZXJzLnNvdXJjZWZvcmdl
+Lm5ldD6IYAQTEQIAIAUCRB/clAIbAwYLCQgHAwIEFQIIAwQWAgMBAh4BAheAAAoJ
+EA09U2kI06Aen2YAn0hvuDs+Gslq9vPRFFbsFNJI40PmAJ0chjiiEy0xV5C+n6YX
+XFuldRDILYhGBBARAgAGBQJEIp4AAAoJELO5mMzzmgZbgKQAn3pWrmFdj8YaEyuR
+tEjKVZJDQ6ZVAJ0Y1igwADT40BPra+G/xiLa3YbCrrkCDQREH9ynEAgAiR4/0r0d
+doViNECfSLClllu5K0Bo1SEiMtvVNC3sJYgVzBddD8Xn8UAdjyAgmaL5FC2FsNQu
+RxxKkNlHNYCq8ZSWtZaL2MQ+SyMUyHv6VXVCGuSW0COpzbx58u+SZpjyESJ1kaZc
+73SaIw6kv/dVQHjurwmlo1lg3dLZ3PG08WGCYUMqkkv2K+J7+puzE2Cjo31gTq4s
+LYDCV26wjVQ6BqT2EcHQhVEjh0xq5ugc908cr/2FQAKkTifEbF+OVBGWiFMGgri+
+6+G54/BV/RakpvNCFYBiZHn/M9mQaWt7XoTmnEQ1ldq5KNlRhkqnQRF/NK5VpGcQ
+29As28aqpZTECwADBgf/WlRvBRI1Q1eIv2falEv7C6sOxqc3kr5z1uUBTRG5v9t6
+ff9k/J4oC6cnQx00GK3ZR8ija6bl8zwu+0m0M3rW49Krb1rsiT7r4ahOZ7p9RRro
+oG3NbUJYgMG10D1nxpaioYqa/m+PpILJM0wfYZZEuX0xkZcOB24yb+J7EIcGR09T
+mMd5sXtdTU+w/p7Xi2cP61uQ8qixyHBH8E06qgW2JtVFV9rGn7CNUOvkNaUBRnY5
+QxhdkvKJRx7voOLYWZFUBIWgto+6vmTgKmc2Ho6qddzME9UgwUNcknRgm0cf6Cxr
+6zPtxZl8a6KemjQcK7kARSmMNCDkqp/Pohe519A5vYhJBBgRAgAJBQJEH9ynAhsM
+AAoJEA09U2kI06Aesv4AnjiVQVLzqnNS/64vvMMP1UARY3HtAJ90YxNGhRNIhWYL
+UU16oJlGD/9M1Q==
+=gWy2
+-----END PGP PUBLIC KEY BLOCK-----
From scoder at codespeak.net Thu May 18 10:38:08 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 10:38:08 +0200 (CEST)
Subject: [Lxml-checkins] r27389 - lxml/trunk/doc
Message-ID: <20060518083808.6F8551006B@code0.codespeak.net>
Author: scoder
Date: Thu May 18 10:38:07 2006
New Revision: 27389
Modified:
lxml/trunk/doc/build.txt
Log:
added section by Andreas Pakulat on how to build Debian packages from SVN sources
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Thu May 18 10:38:07 2006
@@ -169,3 +169,26 @@
python setup.py bdist_wininst
This will create a windows installer in the ``pkg`` directory.
+
+
+Building Debian packages from Subversion sources
+------------------------------------------------
+
+`Andreas Pakulat`_ proposed the following approach.
+
+.. _`Andreas Pakulat`: http://codespeak.net/pipermail/lxml-dev/2006-May/001254.html
+
+* ``apt-get source lxml``
+* remove the unpacked directory
+* tar.gz the trunk version and replace the orig.tar.gz that lies in the
+ directory
+* do ``dpkg -x lxml-...dsc`` and cd into the newly created directory
+* run ``dch -i`` and add a comment like "use trunk version", this will
+ increase the debian version number so apt/dpkg don't get confused
+* run ``dpkg-buildpackage -rfakeroot -us -uc`` to build the package
+
+Eventually dpkg-buildpackage will tell you that some dependecies are missing,
+you can either install them manually or run apt-get build-dep lxml
+
+That will give you .deb packages in the parent directory which can be
+installed using ``dpkg -i``.
From scoder at codespeak.net Thu May 18 10:41:04 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 10:41:04 +0200 (CEST)
Subject: [Lxml-checkins] r27390 - lxml/trunk/doc
Message-ID: <20060518084104.DF4D81006B@code0.codespeak.net>
Author: scoder
Date: Thu May 18 10:41:03 2006
New Revision: 27390
Modified:
lxml/trunk/doc/build.txt
Log:
doc cleanup
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Thu May 18 10:41:03 2006
@@ -171,8 +171,8 @@
This will create a windows installer in the ``pkg`` directory.
-Building Debian packages from Subversion sources
-------------------------------------------------
+Building Debian packages from SVN sources
+-----------------------------------------
`Andreas Pakulat`_ proposed the following approach.
@@ -180,7 +180,7 @@
* ``apt-get source lxml``
* remove the unpacked directory
-* tar.gz the trunk version and replace the orig.tar.gz that lies in the
+* tar.gz the lxml SVN version and replace the orig.tar.gz that lies in the
directory
* do ``dpkg -x lxml-...dsc`` and cd into the newly created directory
* run ``dch -i`` and add a comment like "use trunk version", this will
From scoder at codespeak.net Thu May 18 11:00:04 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 11:00:04 +0200 (CEST)
Subject: [Lxml-checkins] r27391 - lxml/trunk/src/lxml
Message-ID: <20060518090004.D5C3F1006D@code0.codespeak.net>
Author: scoder
Date: Thu May 18 11:00:03 2006
New Revision: 27391
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
allow 'alpha' and 'beta' in version strings, represent as -2 and -1 in version tuple
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Thu May 18 11:00:03 2006
@@ -64,6 +64,7 @@
try:
version_list.append(int(item))
except ValueError:
+ item = {'alpha':-2, 'beta':-1}.get(item.lower(), item)
version_list.append(item)
return tuple(version_list)
From scoder at codespeak.net Thu May 18 11:03:51 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 11:03:51 +0200 (CEST)
Subject: [Lxml-checkins] r27392 - lxml/trunk/src/lxml
Message-ID: <20060518090351.9479D10061@code0.codespeak.net>
Author: scoder
Date: Thu May 18 11:03:48 2006
New Revision: 27392
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
cleanup: make special casing more explicit
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Thu May 18 11:03:48 2006
@@ -64,7 +64,10 @@
try:
version_list.append(int(item))
except ValueError:
- item = {'alpha':-2, 'beta':-1}.get(item.lower(), item)
+ if item == 'alpha':
+ item = -2
+ elif item == 'beta':
+ item = -1
version_list.append(item)
return tuple(version_list)
From scoder at codespeak.net Thu May 18 11:10:37 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 11:10:37 +0200 (CEST)
Subject: [Lxml-checkins] r27393 - lxml/trunk
Message-ID: <20060518091037.9FF5610061@code0.codespeak.net>
Author: scoder
Date: Thu May 18 11:10:36 2006
New Revision: 27393
Modified:
lxml/trunk/setup.py
Log:
fix commit 26466: clean up appending '-lexslt' to xslt_libs
Modified: lxml/trunk/setup.py
==============================================================================
--- lxml/trunk/setup.py (original)
+++ lxml/trunk/setup.py Thu May 18 11:10:36 2006
@@ -68,8 +68,7 @@
# compile also against libexslt!
xslt_libs = flags('xslt-config --libs')
-xslt_libs.append('-lexslt')
-for i, libname in (): # enumerate(xslt_libs):
+for i, libname in enumerate(xslt_libs):
if 'exslt' in libname:
break
if 'xslt' in libname:
From scoder at codespeak.net Thu May 18 11:48:31 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 11:48:31 +0200 (CEST)
Subject: [Lxml-checkins] r27395 - in lxml/trunk: . doc
Message-ID: <20060518094831.A5E8310061@code0.codespeak.net>
Author: scoder
Date: Thu May 18 11:48:30 2006
New Revision: 27395
Modified:
lxml/trunk/doc/build.txt
lxml/trunk/setup.py
Log:
simplify static compilation by providing a place to fill in the library names
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Thu May 18 11:48:30 2006
@@ -100,8 +100,8 @@
.. _`David Sankel`: http://codespeak.net/pipermail/lxml-dev/2006-May/001196.html
Download lxml and all required libraries to the same directory. The iconv,
-libxml2, libxslt, and zlib libraries are all available from xmlsoft.org. The
-place to go on the ftp site is ftp://xmlsoft.org/libxml2/win32.
+libxml2, libxslt, and zlib libraries are all available from the ftp site
+ftp://ftp.zlatkovic.com/pub/libxml/.
Your directory should now have the following files in it (although possibly
different versions)::
@@ -126,47 +126,47 @@
zlib-1.2.3.win32/
zlib-1.2.3.win32.zip
-Go to the lxml-0.9.2 directory and edit the file ``setup.py``. There should be
-a section that looks like this::
+Go to the lxml-0.9.2 directory and edit the file ``setup.py``. There should
+be a section near the top that looks like this::
- ext_modules = [ Extension(
- "lxml.etree",
- sources = sources,
- extra_compile_args = ['-w'] + flags('xslt-config --cflags'),
- extra_link_args = flags('xslt-config --libs')
- )],
-
-The problem here is that the Windows version of libxslt does not install the
-little program ``xslt-config``, which would normally auto-configure the build
-process.
+ def setupStaticBuild():
+ cflags = [
+ ]
+ xslt_libs = [
+ ]
+ result = (cflags, xslt_libs)
+ # return result
+ raise NotImplementedError, \
+ "Static build not configured, see doc/build.txt"
Change this section to something like this, but take care to use the correct
version numbers::
- ext_modules = [ Extension(
- "lxml.etree",
- sources = sources,
- extra_compile_args = ['-w'] + [
+ def setupStaticBuild():
+ cflags = [
"-I..\\libxml2-2.6.23.win32\\include ",
"-I..\\libxslt-1.1.15.win32\\include",
"-I..\\zlib-1.2.3.win32\\include",
"-I..\\iconv-1.9.1.win32\\include"
- ],
- extra_link_args = [
+ ]
+ xslt_libs = [
"..\\libxml2-2.6.23.win32\\lib\\libxml2_a.lib",
"..\\libxslt-1.1.15.win32\\lib\\libxslt_a.lib",
+ "..\\libxslt-1.1.15.win32\\lib\\libexslt_a.lib",
"..\\zlib-1.2.3.win32\\lib\\zlib.lib",
- "..\\iconv- 1.9.1.win32\\lib\\iconv_a.lib"
+ "..\\iconv-1.9.1.win32\\lib\\iconv_a.lib"
]
- )],
+ result = (cflags, xslt_libs)
+ return result
The ``_a`` part of the library names means that we are linking statically
against the named library files. If you want to use DLLs, you need to link
against the DLL version of the libraries.
-Now you should be able to use setup.py and everything should work well. Try calling::
+Now you should be able to use setup.py and everything should work well. Try
+calling::
- python setup.py bdist_wininst
+ python setup.py bdist_wininst --static
This will create a windows installer in the ``pkg`` directory.
Modified: lxml/trunk/setup.py
==============================================================================
--- lxml/trunk/setup.py (original)
+++ lxml/trunk/setup.py Thu May 18 11:48:30 2006
@@ -1,9 +1,32 @@
import sys, os, os.path, re
+setup_args = {}
+try:
+ from setuptools import setup
+ from setuptools.extension import Extension
+ # prevent setuptools from making local etree.so copies:
+ setup_args['zip_safe'] = False
+except ImportError:
+ from distutils.core import setup
+ from distutils.extension import Extension
+
+# This is called if the '--static' option is passed
+def setupStaticBuild():
+ "See doc/build.txt to make this work."
+ cflags = [
+ ]
+ xslt_libs = [
+ ]
+ result = (cflags, xslt_libs)
+ # return result
+ raise NotImplementedError, \
+ "Static build not configured, see doc/build.txt"
+
def flags(cmd):
wf, rf, ef = os.popen3(cmd)
return rf.read().strip().split(' ')
+
src_dir = os.path.join(os.getcwd(), os.path.dirname(sys.argv[0]))
version = open(os.path.join(src_dir, 'version.txt')).read().strip()
@@ -26,17 +49,7 @@
print "Building lxml version", svn_version
-setup_args = {}
-changelog_text = ""
-
-try:
- from setuptools import setup
- from setuptools.extension import Extension
- # prevent setuptools from making local etree.so copies:
- setup_args['zip_safe'] = False
-except ImportError:
- from distutils.core import setup
- from distutils.extension import Extension
+# setup etree extension building
try:
from Pyrex.Distutils import build_ext as build_pyx
@@ -47,6 +60,35 @@
sources = ["src/lxml/etree.c"]
try:
+ sys.argv.remove('--static')
+except ValueError:
+ # we are not compiling statically
+ cflags = flags('xslt-config --cflags')
+ xslt_libs = flags('xslt-config --libs')
+
+ # compile also against libexslt!
+ for i, libname in enumerate(xslt_libs):
+ if 'exslt' in libname:
+ break
+ if 'xslt' in libname:
+ xslt_libs.insert(i, libname.replace('xslt', 'exslt'))
+ break
+else:
+ # use the static setup as configured in setupStaticBuild
+ cflags, xslt_libs = setupStaticBuild()
+
+ext_modules = [ Extension(
+ "lxml.etree",
+ sources = sources,
+ extra_compile_args = ['-w'] + cflags,
+ extra_link_args = xslt_libs
+ )]
+
+
+# setup ChangeLog entry
+
+changelog_text = ""
+try:
changelog = open(os.path.join(src_dir, "CHANGES.txt"), 'r')
except:
print "*NOTE*: couldn't open CHANGES.txt !"
@@ -66,14 +108,6 @@
changelog.close()
-# compile also against libexslt!
-xslt_libs = flags('xslt-config --libs')
-for i, libname in enumerate(xslt_libs):
- if 'exslt' in libname:
- break
- if 'xslt' in libname:
- xslt_libs.insert(i, libname.replace('xslt', 'exslt'))
- break
setup(
name = "lxml",
@@ -109,11 +143,6 @@
package_dir = {'': 'src'},
packages = ['lxml'],
- ext_modules = [ Extension(
- "lxml.etree",
- sources = sources,
- extra_compile_args = ['-w'] + flags('xslt-config --cflags'),
- extra_link_args = xslt_libs
- )],
+ ext_modules = ext_modules,
**setup_args
)
From scoder at codespeak.net Thu May 18 11:54:27 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 11:54:27 +0200 (CEST)
Subject: [Lxml-checkins] r27396 - lxml/trunk
Message-ID: <20060518095427.4362A10061@code0.codespeak.net>
Author: scoder
Date: Thu May 18 11:54:26 2006
New Revision: 27396
Modified:
lxml/trunk/version.txt
Log:
set version to 1.0.beta
Modified: lxml/trunk/version.txt
==============================================================================
--- lxml/trunk/version.txt (original)
+++ lxml/trunk/version.txt Thu May 18 11:54:26 2006
@@ -1 +1 @@
-0.9.2
+1.0.beta
From scoder at codespeak.net Thu May 18 11:55:05 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 11:55:05 +0200 (CEST)
Subject: [Lxml-checkins] r27397 - lxml/trunk/src/lxml
Message-ID: <20060518095505.C517810061@code0.codespeak.net>
Author: scoder
Date: Thu May 18 11:55:04 2006
New Revision: 27397
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
small cleanup in version extraction code
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Thu May 18 11:55:04 2006
@@ -62,13 +62,13 @@
l = (version.replace('-', '.').split('.') + [0]*4)[:4]
for item in l:
try:
- version_list.append(int(item))
+ item = int(item)
except ValueError:
if item == 'alpha':
item = -2
elif item == 'beta':
item = -1
- version_list.append(item)
+ version_list.append(item)
return tuple(version_list)
cdef __unpackIntVersion(int c_version):
From scoder at codespeak.net Thu May 18 11:57:38 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 11:57:38 +0200 (CEST)
Subject: [Lxml-checkins] r27398 - lxml/trunk/src/lxml/tests
Message-ID: <20060518095738.3494710061@code0.codespeak.net>
Author: scoder
Date: Thu May 18 11:57:37 2006
New Revision: 27398
Modified:
lxml/trunk/src/lxml/tests/test_etree.py
Log:
test case for etree.__version__ string and LXML_VERSION tuple
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Thu May 18 11:57:37 2006
@@ -16,7 +16,18 @@
class ETreeOnlyTestCase(HelperTestCase):
"""Tests only for etree, not ElementTree"""
etree = etree
-
+
+ def test_version(self):
+ self.assert_(isinstance(etree.__version__, str))
+ self.assert_(isinstance(etree.LXML_VERSION, tuple))
+ self.assertEqual(len(etree.LXML_VERSION), 4)
+ self.assert_(isinstance(etree.LXML_VERSION[0], int))
+ self.assert_(isinstance(etree.LXML_VERSION[1], int))
+ self.assert_(isinstance(etree.LXML_VERSION[2], int))
+ self.assert_(isinstance(etree.LXML_VERSION[3], int))
+ self.assert_(etree.__version__.startswith(
+ str(etree.LXML_VERSION[0])))
+
def test_parse_error(self):
parse = self.etree.parse
# from StringIO
From scoder at codespeak.net Thu May 18 12:27:47 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 12:27:47 +0200 (CEST)
Subject: [Lxml-checkins] r27399 - lxml/trunk/doc
Message-ID: <20060518102747.77A421006B@code0.codespeak.net>
Author: scoder
Date: Thu May 18 12:27:45 2006
New Revision: 27399
Modified:
lxml/trunk/doc/build.txt
Log:
clarifications in doc/build.txt
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Thu May 18 12:27:45 2006
@@ -92,10 +92,11 @@
-------------------------
Most operating systems have proper package management that makes installing
-current versions of libxml2 and libxslt easy. However, Microsoft Windows
-lacks these capabilities. It can therefore be interesting to statically link
-the external libraries into lxml.etree to avoid having to install them
-separately. `David Sankel`_ proposed the following approach.
+current versions of libxml2 and libxslt easy. The most famous exception is
+Microsoft Windows, which entirely lacks these capabilities. It can therefore
+be interesting to statically link the external libraries into lxml.etree to
+avoid having to install them separately. `David Sankel`_ proposed the
+following approach.
.. _`David Sankel`: http://codespeak.net/pipermail/lxml-dev/2006-May/001196.html
@@ -160,11 +161,11 @@
return result
The ``_a`` part of the library names means that we are linking statically
-against the named library files. If you want to use DLLs, you need to link
-against the DLL version of the libraries.
+against the named library files. If you want to use dynamic libraries, you
+need to link against the DLL version of the libraries.
-Now you should be able to use setup.py and everything should work well. Try
-calling::
+Now you should be able to pass the ``--static`` option to setup.py and
+everything should work well. Try calling::
python setup.py bdist_wininst --static
From scoder at codespeak.net Thu May 18 13:01:58 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 13:01:58 +0200 (CEST)
Subject: [Lxml-checkins] r27402 - lxml/trunk
Message-ID: <20060518110158.34E581006B@code0.codespeak.net>
Author: scoder
Date: Thu May 18 13:01:57 2006
New Revision: 27402
Modified:
lxml/trunk/MANIFEST.in
Log:
forgot to include .css file for generated HTML pages
Modified: lxml/trunk/MANIFEST.in
==============================================================================
--- lxml/trunk/MANIFEST.in (original)
+++ lxml/trunk/MANIFEST.in Thu May 18 13:01:57 2006
@@ -1,5 +1,5 @@
include setup.py MANIFEST.in *.txt
recursive-include src *.pyx *.pxd *.pxi *.py etree.c etree.h
recursive-include src/lxml/tests *.rng *.xslt *.xml
-recursive-include doc *.txt *.html *.xml *.mgp pubkey.asc
+recursive-include doc *.txt *.html *.css *.xml *.mgp pubkey.asc
exclude doc/pyrex.txt
From scoder at codespeak.net Thu May 18 13:02:30 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 13:02:30 +0200 (CEST)
Subject: [Lxml-checkins] r27403 - lxml/trunk/doc
Message-ID: <20060518110230.0DDCB1006B@code0.codespeak.net>
Author: scoder
Date: Thu May 18 13:02:29 2006
New Revision: 27403
Modified:
lxml/trunk/doc/build.txt
Log:
refer to version 1.0.0 in docs
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Thu May 18 13:02:29 2006
@@ -122,12 +122,12 @@
libxml2-2.6.23.win32.zip
libxslt-1.1.15.win32/
libxslt-1.1.15.win32.zip
- lxml-0.9.2/
- lxml-0.9.2.tgz
+ lxml-1.0.0/
+ lxml-1.0.0.tgz
zlib-1.2.3.win32/
zlib-1.2.3.win32.zip
-Go to the lxml-0.9.2 directory and edit the file ``setup.py``. There should
+Go to the lxml-1.0.0 directory and edit the file ``setup.py``. There should
be a section near the top that looks like this::
def setupStaticBuild():
From scoder at codespeak.net Thu May 18 13:02:54 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 13:02:54 +0200 (CEST)
Subject: [Lxml-checkins] r27404 - lxml/trunk/doc
Message-ID: <20060518110254.40A9E1006B@code0.codespeak.net>
Author: scoder
Date: Thu May 18 13:02:53 2006
New Revision: 27404
Modified:
lxml/trunk/doc/build.txt
Log:
refer to version 1.0.0 in docs
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Thu May 18 13:02:53 2006
@@ -110,7 +110,7 @@
iconv-1.9.1.win32.zip
libxml2-2.6.23.win32.zip
libxslt-1.1.15.win32.zip
- lxml-0.9.2.tgz
+ lxml-1.0.0.tgz
zlib-1.2.3.win32.zip
Now extract each of those files in the *same* directory. This should give you
From scoder at codespeak.net Thu May 18 13:13:27 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 13:13:27 +0200 (CEST)
Subject: [Lxml-checkins] r27406 - lxml/trunk/doc
Message-ID: <20060518111327.AB0FC1006B@code0.codespeak.net>
Author: scoder
Date: Thu May 18 13:13:26 2006
New Revision: 27406
Modified:
lxml/trunk/doc/api.txt
Log:
clarification on tounicode() vs. tostring()
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Thu May 18 13:13:26 2006
@@ -172,12 +172,14 @@
>>> etree.tounicode(et)
u' '
-Note that the unicode strings returned by ``tounicode()`` never have an XML
-declaration and therefore do not specify an encoding. This makes it possible
-to pass them back into the lxml parsers. However, you may have to add a
-declaration yourself if you want to serialize such a unicode string to a byte
-stream later. In contrast, the ``tostring()`` function automatically adds a
-declaration as needed that reflects the encoding of the returned byte string.
+If you want to save the result to a file or pass it over the network, you
+should use ``write()`` or ``tostring()`` with an encoding argument (typically
+UTF-8) to serialize the XML. The main reason is that unicode strings returned
+by ``tounicode()`` never have an XML declaration and therefore do not specify
+an encoding. In contrast, the ``tostring()`` function automatically adds a
+declaration as needed that reflects the encoding of the returned string. This
+makes it possible for other parsers to correctly parse the XML byte stream.
+Note that using ``tostring()`` with UTF-8 is also typically faster.
xpath method on ElementTree, Element
From scoder at codespeak.net Thu May 18 13:27:40 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 13:27:40 +0200 (CEST)
Subject: [Lxml-checkins] r27408 - lxml/trunk
Message-ID: <20060518112740.089CF1006B@code0.codespeak.net>
Author: scoder
Date: Thu May 18 13:27:38 2006
New Revision: 27408
Modified:
lxml/trunk/CHANGES.txt
Log:
set current version in CHANGES.txt to 1.0.beta
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu May 18 13:27:38 2006
@@ -1,8 +1,8 @@
lxml changelog
==============
-current
-=======
+1.0.beta (2006-05-18)
+=====================
Features added
--------------
From scoder at codespeak.net Thu May 18 13:30:52 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 13:30:52 +0200 (CEST)
Subject: [Lxml-checkins] r27409 - lxml/tag/lxml-1.0.beta
Message-ID: <20060518113052.E267E10061@code0.codespeak.net>
Author: scoder
Date: Thu May 18 13:30:51 2006
New Revision: 27409
Added:
lxml/tag/lxml-1.0.beta/
- copied from r27408, lxml/trunk/
Log:
tag for 1.0.beta
From scoder at codespeak.net Thu May 18 13:45:11 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 13:45:11 +0200 (CEST)
Subject: [Lxml-checkins] r27410 - lxml/trunk
Message-ID: <20060518114511.B312F1006B@code0.codespeak.net>
Author: scoder
Date: Thu May 18 13:45:10 2006
New Revision: 27410
Modified:
lxml/trunk/MANIFEST.in
Log:
explicitly name .txt files in root directory
Modified: lxml/trunk/MANIFEST.in
==============================================================================
--- lxml/trunk/MANIFEST.in (original)
+++ lxml/trunk/MANIFEST.in Thu May 18 13:45:10 2006
@@ -1,4 +1,5 @@
-include setup.py MANIFEST.in *.txt
+include setup.py MANIFEST.in version.txt
+include CHANGES.txt CREDITS.txt INSTALL.txt LICENSES.txt README.txt TODO.txt
recursive-include src *.pyx *.pxd *.pxi *.py etree.c etree.h
recursive-include src/lxml/tests *.rng *.xslt *.xml
recursive-include doc *.txt *.html *.css *.xml *.mgp pubkey.asc
From scoder at codespeak.net Thu May 18 13:53:49 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 13:53:49 +0200 (CEST)
Subject: [Lxml-checkins] r27411 - lxml/trunk
Message-ID: <20060518115349.52B581006B@code0.codespeak.net>
Author: scoder
Date: Thu May 18 13:53:48 2006
New Revision: 27411
Modified:
lxml/trunk/setup.py
Log:
automise setting trove devel status (alpha/beta/stable) from version string
Modified: lxml/trunk/setup.py
==============================================================================
--- lxml/trunk/setup.py (original)
+++ lxml/trunk/setup.py Thu May 18 13:53:48 2006
@@ -47,6 +47,13 @@
''' % svn_version)
version_h.close()
+if 'alpha' in version:
+ dev_status = 'Development Status :: 3 - Alpha'
+elif 'beta' in version:
+ dev_status = 'Development Status :: 4 - Beta'
+else:
+ dev_status = 'Development Status :: 5 - Production/Stable'
+
print "Building lxml version", svn_version
# setup etree extension building
@@ -130,7 +137,7 @@
""" + changelog_text,
classifiers = [
- 'Development Status :: 5 - Production/Stable',
+ dev_status,
'Intended Audience :: Developers',
'Intended Audience :: Information Technology',
'License :: OSI Approved :: BSD License',
From scoder at codespeak.net Thu May 18 15:42:55 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 15:42:55 +0200 (CEST)
Subject: [Lxml-checkins] r27413 - lxml/trunk
Message-ID: <20060518134255.9030A1006B@code0.codespeak.net>
Author: scoder
Date: Thu May 18 15:42:54 2006
New Revision: 27413
Modified:
lxml/trunk/bench.py
Log:
fix bench.py xslt_extensions_old and xpath_extensions_old, used a non-public API that's no longer available
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Thu May 18 15:42:54 2006
@@ -449,7 +449,7 @@
return element[0]
else:
return ()
- extensions = {None : {'child' : return_child}}
+ extensions = {(None, 'child') : return_child}
xpath = self.etree.XPath("child(.)", extensions=extensions)
for child in root:
xpath(child)
@@ -474,7 +474,7 @@
def return_child(_, elements):
return elements[0][0]
- extensions = {'testns' : {'child' : return_child}}
+ extensions = {('testns', 'child') : return_child}
transform = self.etree.XSLT(tree, extensions)
for i in range(10):
From scoder at codespeak.net Thu May 18 16:19:08 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 16:19:08 +0200 (CEST)
Subject: [Lxml-checkins] r27414 - in lxml/trunk: . src/lxml
Message-ID: <20060518141908.D4E741006B@code0.codespeak.net>
Author: scoder
Date: Thu May 18 16:19:07 2006
New Revision: 27414
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/etree.pyx
Log:
cleanup changeDocumentBelow, rename it to moveNodeToDocument to reflect its use
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu May 18 16:19:07 2006
@@ -1,6 +1,16 @@
lxml changelog
==============
+current
+=======
+
+Features added
+--------------
+
+Bugs fixed
+----------
+
+
1.0.beta (2006-05-18)
=====================
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Thu May 18 16:19:07 2006
@@ -388,7 +388,7 @@
return source.geturl()
return None
-cdef void changeDocumentBelow(_NodeBase node, _Document doc, int recursive):
+cdef void moveNodeToDocument(_NodeBase node, _Document doc):
"""For a node and all nodes below, change document.
A node can change document in certain operations as an XML
@@ -396,11 +396,11 @@
tree below (including the current node). It also reconciliates
namespaces so they're correct inside the new environment.
"""
- if recursive:
- changeDocumentBelowHelper(node._c_node, doc)
+ if node._doc is not doc:
+ changeDocumentBelow(node._c_node, doc)
tree.xmlReconciliateNs(doc._c_doc, node._c_node)
-
-cdef void changeDocumentBelowHelper(xmlNode* c_node, _Document doc):
+
+cdef void changeDocumentBelow(xmlNode* c_node, _Document doc):
cdef ProxyRef* ref
cdef xmlNode* c_current
cdef xmlAttr* c_attr_current
@@ -410,22 +410,23 @@
return
# different _c_doc
c_node.doc = doc._c_doc
-
- if c_node._private is not NULL:
- ref = c_node._private
- while ref is not NULL:
- proxy = <_NodeBase>ref.proxy
- proxy._doc = doc
- ref = ref.next
# adjust all children
c_current = c_node.children
while c_current is not NULL:
- changeDocumentBelowHelper(c_current, doc)
+ changeDocumentBelow(c_current, doc)
c_current = c_current.next
# adjust all attributes
c_attr_current = c_node.properties
while c_attr_current is not NULL:
- changeDocumentBelowHelper(c_current, doc)
+ changeDocumentBelow(c_current, doc)
c_attr_current = c_attr_current.next
+
+ # adjust Python references last
+ if c_node._private is not NULL:
+ ref = c_node._private
+ while ref is not NULL:
+ proxy = <_NodeBase>ref.proxy
+ proxy._doc = doc
+ ref = ref.next
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Thu May 18 16:19:07 2006
@@ -524,16 +524,14 @@
def __setitem__(self, Py_ssize_t index, _NodeBase element):
cdef xmlNode* c_node
cdef xmlNode* c_next
- cdef int foreign
c_node = _findChild(self._c_node, index)
if c_node is NULL:
raise IndexError
- foreign = self._doc is not element._doc
c_next = element._c_node.next
_removeText(c_node.next)
tree.xmlReplaceNode(c_node, element._c_node)
_moveTail(c_next, element._c_node)
- changeDocumentBelow(element, self._doc, foreign)
+ moveNodeToDocument(element, self._doc)
def __delitem__(self, Py_ssize_t index):
cdef xmlNode* c_node
@@ -552,7 +550,6 @@
cdef xmlNode* c_node
cdef xmlNode* c_next
cdef _Element mynode
- cdef int foreign
# first, find start of slice
c_node = _findChild(self._c_node, start)
# now delete the slice
@@ -568,7 +565,6 @@
for mynode in value:
if mynode is None:
raise TypeError, "Node must not be None."
- foreign = self._doc is not mynode._doc
# store possible text tail
c_next = mynode._c_node.next
# now move node previous to insertion point
@@ -577,7 +573,7 @@
# and move tail just behind his node
_moveTail(c_next, mynode._c_node)
# move it into a new document
- changeDocumentBelow(mynode, self._doc, foreign)
+ moveNodeToDocument(mynode, self._doc)
def __deepcopy__(self, memo):
return self.__copy__()
@@ -600,8 +596,6 @@
def append(self, _Element element not None):
cdef xmlNode* c_next
cdef xmlNode* c_node
- cdef int foreign
- foreign = self._doc is not element._doc
c_node = element._c_node
# store possible text node
c_next = c_node.next
@@ -612,7 +606,7 @@
_moveTail(c_next, c_node)
# uh oh, elements may be pointing to different doc when
# parent element has moved; change them too..
- changeDocumentBelow(element, self._doc, foreign)
+ moveNodeToDocument(element, self._doc)
def clear(self):
cdef xmlAttr* c_attr
@@ -642,16 +636,14 @@
def insert(self, index, _Element element not None):
cdef xmlNode* c_node
cdef xmlNode* c_next
- cdef int foreign
c_node = _findChild(self._c_node, index)
if c_node is NULL:
self.append(element)
return
- foreign = self._doc is not element._doc
c_next = element._c_node.next
tree.xmlAddPrevSibling(c_node, element._c_node)
_moveTail(c_next, element._c_node)
- changeDocumentBelow(element, self._doc, foreign)
+ moveNodeToDocument(element, self._doc)
def remove(self, _Element element not None):
cdef xmlNode* c_node
@@ -1381,16 +1373,7 @@
c_doc = _newDoc()
doc = _documentFactory(c_doc, parser)
- etree = _elementTreeFactory(doc, element)
-
-## # XXX what if element and file are both not None?
-## if element is not None:
-## c_next = element._c_node.next
-## tree.xmlDocSetRootElement(etree._c_doc, element._c_node)
-## _moveTail(c_next, element._c_node)
-## changeDocumentBelow(element, etree)
-
- return etree
+ return _elementTreeFactory(doc, element)
def HTML(text):
cdef _Document doc
From scoder at codespeak.net Thu May 18 21:07:47 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 21:07:47 +0200 (CEST)
Subject: [Lxml-checkins] r27420 - lxml/trunk/src/lxml
Message-ID: <20060518190747.8FD3B1006E@code0.codespeak.net>
Author: scoder
Date: Thu May 18 21:07:46 2006
New Revision: 27420
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
whitespace
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Thu May 18 21:07:46 2006
@@ -1148,7 +1148,7 @@
return True
else:
return False
-
+
cdef _Attrib _attribFactory(_Document doc, xmlNode* c_node):
cdef _Attrib result
result = getProxy(c_node, PROXY_ATTRIB)
From scoder at codespeak.net Thu May 18 21:09:30 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 21:09:30 +0200 (CEST)
Subject: [Lxml-checkins] r27421 - lxml/trunk/src/lxml
Message-ID: <20060518190930.16B291006E@code0.codespeak.net>
Author: scoder
Date: Thu May 18 21:09:29 2006
New Revision: 27421
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
Log:
do not update C pointers of elements and attributes to the xmlDoc in changeDocumentBelow: already done by libxml2's xmlSetTreeDoc
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Thu May 18 21:09:29 2006
@@ -401,6 +401,11 @@
tree.xmlReconciliateNs(doc._c_doc, node._c_node)
cdef void changeDocumentBelow(xmlNode* c_node, _Document doc):
+ """Update the Python references in the tree below the node.
+
+ Note that we expect C pointers to the document to be updated already by
+ libxml2.
+ """
cdef ProxyRef* ref
cdef xmlNode* c_current
cdef xmlAttr* c_attr_current
@@ -408,22 +413,14 @@
if c_node is NULL:
return
- # different _c_doc
- c_node.doc = doc._c_doc
# adjust all children
c_current = c_node.children
while c_current is not NULL:
changeDocumentBelow(c_current, doc)
c_current = c_current.next
-
- # adjust all attributes
- c_attr_current = c_node.properties
- while c_attr_current is not NULL:
- changeDocumentBelow(c_current, doc)
- c_attr_current = c_attr_current.next
- # adjust Python references last
+ # adjust Python references last (may trigger GC on _Document)
if c_node._private is not NULL:
ref = c_node._private
while ref is not NULL:
From scoder at codespeak.net Thu May 18 21:10:39 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 21:10:39 +0200 (CEST)
Subject: [Lxml-checkins] r27422 - lxml/trunk
Message-ID: <20060518191039.B7C5D1006E@code0.codespeak.net>
Author: scoder
Date: Thu May 18 21:10:38 2006
New Revision: 27422
Modified:
lxml/trunk/bench.py
Log:
cleanup in get_attributes benchmark: do not set them, only read them
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Thu May 18 21:10:38 2006
@@ -361,9 +361,8 @@
@with_attributes(True)
def bench_get_attributes(self, root):
for child in root:
- child.set('a', 'bla')
- for child in root:
- child.get('a')
+ child.get('bla1')
+ child.get('{attr}test1')
def bench_setget_attributes(self, root):
for child in root:
From scoder at codespeak.net Thu May 18 21:25:19 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 21:25:19 +0200 (CEST)
Subject: [Lxml-checkins] r27423 - lxml/trunk/src/lxml
Message-ID: <20060518192519.2DA2D1006E@code0.codespeak.net>
Author: scoder
Date: Thu May 18 21:25:18 2006
New Revision: 27423
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
Log:
more cleanup in changeDocumentBelow to remove redundancy
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Thu May 18 21:25:18 2006
@@ -408,22 +408,16 @@
"""
cdef ProxyRef* ref
cdef xmlNode* c_current
- cdef xmlAttr* c_attr_current
cdef _NodeBase proxy
-
- if c_node is NULL:
- return
-
- # adjust all children
+ # adjust all children recursively
c_current = c_node.children
while c_current is not NULL:
changeDocumentBelow(c_current, doc)
c_current = c_current.next
- # adjust Python references last (may trigger GC on _Document)
- if c_node._private is not NULL:
- ref = c_node._private
- while ref is not NULL:
- proxy = <_NodeBase>ref.proxy
- proxy._doc = doc
- ref = ref.next
+ # adjust Python references of current node
+ ref = c_node._private
+ while ref is not NULL:
+ proxy = <_NodeBase>ref.proxy
+ proxy._doc = doc
+ ref = ref.next
From scoder at codespeak.net Thu May 18 23:42:41 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 23:42:41 +0200 (CEST)
Subject: [Lxml-checkins] r27425 - lxml/trunk/src/lxml
Message-ID: <20060518214241.7252C1006E@code0.codespeak.net>
Author: scoder
Date: Thu May 18 23:42:39 2006
New Revision: 27425
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
helper functions for copying xmlDoc's: central point to work around libxml2 URL bug and hand on parser dict
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Thu May 18 23:42:39 2006
@@ -623,6 +623,32 @@
__GLOBAL_PARSER_CONTEXT._initDocDict(result)
return result
+cdef xmlDoc* _copyDoc(xmlDoc* c_doc, int recursive):
+ cdef xmlDoc* result
+ result = tree.xmlCopyDoc(c_doc, recursive)
+ if c_doc.URL is not NULL:
+ # handle a bug in older libxml2 versions
+ if result.URL is not NULL:
+ tree.xmlFree(result.URL)
+ result.URL = tree.xmlStrdup(c_doc.URL)
+ __GLOBAL_PARSER_CONTEXT._initDocDict(result)
+ return result
+
+cdef xmlDoc* _copyDocRoot(xmlDoc* c_doc, xmlNode* c_new_root):
+ "Recursively copy the document and make c_new_root the new root node."
+ cdef xmlDoc* result
+ cdef xmlDoc* fake_c_doc
+ fake_c_doc = _fakeRootDoc(c_doc, c_new_root)
+ result = tree.xmlCopyDoc(fake_c_doc, 1)
+ _destroyFakeDoc(c_doc, fake_c_doc)
+ if c_doc.URL is not NULL:
+ # handle a bug in older libxml2 versions
+ if result.URL is not NULL:
+ tree.xmlFree(result.URL)
+ result.URL = tree.xmlStrdup(c_doc.URL)
+ __GLOBAL_PARSER_CONTEXT._initDocDict(result)
+ return result
+
############################################################
## API level helper functions for _Document creation
## (here we convert to UTF-8)
From scoder at codespeak.net Thu May 18 23:49:25 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 18 May 2006 23:49:25 +0200 (CEST)
Subject: [Lxml-checkins] r27426 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20060518214925.ED4991006E@code0.codespeak.net>
Author: scoder
Date: Thu May 18 23:49:24 2006
New Revision: 27426
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_elementtree.py
lxml/trunk/src/lxml/xslt.pxi
Log:
fix memory deallocation crash introduced by new Element.__copy__ method, also clean up and fix copying documents by use of _copyDoc and _copyDocRoot helper functions
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu May 18 23:49:24 2006
@@ -7,9 +7,13 @@
Features added
--------------
+* Deep copying Elements and ElementTrees maintains the document information
+
Bugs fixed
----------
+* Memory deallocation crash resulting from deep copying elements
+
1.0.beta (2006-05-18)
=====================
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Thu May 18 23:49:24 2006
@@ -53,7 +53,7 @@
# already the root node
return c_base_doc
- c_doc = tree.xmlCopyDoc(c_base_doc, 0) # non recursive!
+ c_doc = _copyDoc(c_base_doc, 0) # non recursive!
c_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive!
c_root.children = c_node.children
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Thu May 18 23:49:24 2006
@@ -581,15 +581,13 @@
def __copy__(self):
cdef xmlNode* c_node
cdef xmlDoc* c_doc
- cdef xmlDoc* fake_c_doc
cdef _Document doc
+ cdef _Document new_doc
doc = self._doc
- fake_c_doc = _fakeRootDoc(doc._c_doc, self._c_node)
- c_doc = tree.xmlCopyDoc(fake_c_doc, 1) # recursive copy
- _destroyFakeDoc(doc._c_doc, fake_c_doc)
- doc = _documentFactory(c_doc, doc._parser)
- return doc.getroot()
-
+ c_doc = _copyDocRoot(doc._c_doc, self._c_node) # recursive
+ new_doc = _documentFactory(c_doc, doc._parser)
+ return new_doc.getroot()
+
def set(self, key, value):
self.attrib[key] = value
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Thu May 18 23:49:24 2006
@@ -1804,6 +1804,14 @@
self.assertEquals('Foo', a.text)
# XXX ElementTree will share nodes, but lxml.etree won't..
+ def test_deepcopy_append(self):
+ # previously caused a crash
+ Element = self.etree.Element
+
+ a = Element('a')
+ b = copy.deepcopy(a)
+ b.append( Element('c') )
+
def test_element_boolean(self):
etree = self.etree
e = etree.Element('foo')
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Thu May 18 23:49:24 2006
@@ -81,7 +81,7 @@
c_doc = xslt_resolver_context._c_style_doc
if c_doc is not NULL and c_doc.URL is not NULL:
if cstd.strcmp(c_uri, c_doc.URL) == 0:
- return tree.xmlCopyDoc(c_doc, 1)
+ return _copyDoc(c_doc, 1)
# call the Python document loaders
c_doc = NULL
@@ -236,23 +236,16 @@
self._access_control = access_control
# make a copy of the document as stylesheet parsing modifies it
- fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
- c_doc = tree.xmlCopyDoc(fake_c_doc, 1)
- _destroyFakeDoc(doc._c_doc, fake_c_doc)
+ c_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
# make sure we always have a stylesheet URL
- if c_doc.URL is not NULL:
- # handle a bug in older libxml2 versions
- tree.xmlFree(c_doc.URL)
- if doc._c_doc.URL is not NULL:
- c_doc.URL = tree.xmlStrdup(doc._c_doc.URL)
- else:
+ if c_doc.URL is NULL:
doc_url_utf = "XSLT:__STRING__XSLT__%s" % id(self)
c_doc.URL = tree.xmlStrdup(_cstr(doc_url_utf))
self._xslt_resolver_context = _XSLTResolverContext(doc._parser)
# keep a copy in case we need to access the stylesheet via 'document()'
- self._xslt_resolver_context._c_style_doc = tree.xmlCopyDoc(c_doc, 1)
+ self._xslt_resolver_context._c_style_doc = _copyDoc(c_doc, 1)
c_doc._private = self._xslt_resolver_context
c_style = xslt.xsltParseStylesheetDoc(c_doc)
@@ -274,7 +267,7 @@
if self._xslt_resolver_context is not None and \
self._xslt_resolver_context._c_style_doc is not NULL:
tree.xmlFreeDoc(self._xslt_resolver_context._c_style_doc)
- # this cleans up copy of doc as well
+ # this cleans up the doc copy as well
xslt.xsltFreeStylesheet(self._c_style)
property error_log:
From scoder at codespeak.net Fri May 19 00:10:45 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 00:10:45 +0200 (CEST)
Subject: [Lxml-checkins] r27429 - lxml/trunk
Message-ID: <20060518221045.A252A10063@code0.codespeak.net>
Author: scoder
Date: Fri May 19 00:10:43 2006
New Revision: 27429
Modified:
lxml/trunk/bench.py
Log:
fix running all tests in bench.py
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Fri May 19 00:10:43 2006
@@ -574,12 +574,12 @@
# sorted by name and tree tuple
benchmarks = [ sorted(b.benchmarks()) for b in benchmark_suites ]
- if len(sys.argv) > 1:
- selected = []
- for name in sys.argv[1:]:
- if not name.startswith('bench_'):
- name = 'bench_' + name
- selected.append(name)
+ selected = []
+ for name in sys.argv[1:]:
+ if not name.startswith('bench_'):
+ name = 'bench_' + name
+ selected.append(name)
+ if selected:
benchmarks = [ [ b for b in bs
if [ match for match in selected
if match in b[0] ] ]
From scoder at codespeak.net Fri May 19 00:15:14 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 00:15:14 +0200 (CEST)
Subject: [Lxml-checkins] r27430 - lxml/trunk
Message-ID: <20060518221514.838D710063@code0.codespeak.net>
Author: scoder
Date: Fri May 19 00:15:12 2006
New Revision: 27430
Modified:
lxml/trunk/bench.py
Log:
2nd try: fix running all tests in bench.py
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Fri May 19 00:15:12 2006
@@ -555,13 +555,13 @@
except ImportError:
pass
- if '-a' in sys.argv:
+ try:
+ sys.argv.remove('-a')
# 'all' ?
- try:
- from elementtree import ElementTree as ET
- _etrees.append(ET)
- except ImportError:
- pass
+ from elementtree import ElementTree as ET
+ _etrees.append(ET)
+ except (ValueError, ImportError):
+ pass
if not _etrees:
print "No library to test. Exiting."
@@ -574,12 +574,11 @@
# sorted by name and tree tuple
benchmarks = [ sorted(b.benchmarks()) for b in benchmark_suites ]
- selected = []
- for name in sys.argv[1:]:
- if not name.startswith('bench_'):
- name = 'bench_' + name
- selected.append(name)
- if selected:
+ if len(sys.argv) > 1:
+ selected = []
+ for name in sys.argv[1:]:
+ selected.append(name)
+ print selected
benchmarks = [ [ b for b in bs
if [ match for match in selected
if match in b[0] ] ]
From scoder at codespeak.net Fri May 19 07:22:28 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 07:22:28 +0200 (CEST)
Subject: [Lxml-checkins] r27436 - lxml/trunk/src/lxml/tests
Message-ID: <20060519052228.1BB6B1006E@code0.codespeak.net>
Author: scoder
Date: Fri May 19 07:22:26 2006
New Revision: 27436
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
extended test case
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Fri May 19 07:22:26 2006
@@ -1807,10 +1807,17 @@
def test_deepcopy_append(self):
# previously caused a crash
Element = self.etree.Element
+ tostring = self.etree.tostring
a = Element('a')
b = copy.deepcopy(a)
- b.append( Element('c') )
+ a.append( Element('C') )
+ b.append( Element('X') )
+
+ self.assertEquals(' ',
+ tostring(a).replace(' ', ''))
+ self.assertEquals(' ',
+ tostring(b).replace(' ', ''))
def test_element_boolean(self):
etree = self.etree
From scoder at codespeak.net Fri May 19 07:38:19 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 07:38:19 +0200 (CEST)
Subject: [Lxml-checkins] r27437 - lxml/trunk
Message-ID: <20060519053819.ED2A91006E@code0.codespeak.net>
Author: scoder
Date: Fri May 19 07:38:18 2006
New Revision: 27437
Modified:
lxml/trunk/bench.py
Log:
allow benchmarks to actively skip a testrun by raising 'SkippedTest', also catch Exceptions raised in tests
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Fri May 19 07:38:18 2006
@@ -49,6 +49,8 @@
return function
return set_libs
+class SkippedTest(Exception):
+ pass
class BenchMarkBase(object):
atoz = string.ascii_lowercase
@@ -408,14 +410,17 @@
for i in repeat:
child.text
+ @onlylib('lxe')
def bench_index(self, root):
for child in root:
root.index(child)
+ @onlylib('lxe')
def bench_index_slice(self, root):
for child in root[5:100]:
root.index(child, 5, 100)
+ @onlylib('lxe')
def bench_index_slice_neg(self, root):
for child in root[-100:-5]:
root.index(child, start=-100, stop=-5)
@@ -647,12 +652,17 @@
print "(%-10s)" % tree_set_name,
sys.stdout.flush()
- result = run_bench(bench, *benchmark_setup)
-
- print "%9.4f msec/pass, best of (" % min(result),
- for t in result:
- print "%9.4f" % t,
- print ")"
+ try:
+ result = run_bench(bench, *benchmark_setup)
+ except SkippedTest:
+ print "skipped"
+ except Exception, e:
+ print "failed: %s: %s" % (e.__class__.__name__, e)
+ else:
+ print "%9.4f msec/pass, best of (" % min(result),
+ for t in result:
+ print "%9.4f" % t,
+ print ")"
if len(benchmark_suites) > 1:
print # empty line between different benchmarks
From scoder at codespeak.net Fri May 19 07:56:27 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 07:56:27 +0200 (CEST)
Subject: [Lxml-checkins] r27438 - lxml/trunk
Message-ID: <20060519055627.CAB3F1006E@code0.codespeak.net>
Author: scoder
Date: Fri May 19 07:56:26 2006
New Revision: 27438
Modified:
lxml/trunk/bench.py
Log:
fix test comparison in bench.py if tests are skipped via 'onlylib': previously could end up showing unrelated benchmarks next to each other and stopping before finishing all tests
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Fri May 19 07:56:26 2006
@@ -203,8 +203,9 @@
continue
method = getattr(self, name)
if hasattr(method, 'LIBS') and self.lib_name not in method.LIBS:
- benchmarks.append((name, None, (), 0, 0))
- continue
+ method_call = None
+ else:
+ method_call = method
if method.__doc__:
tree_sets = method.__doc__.split()
else:
@@ -223,7 +224,7 @@
for tree_tuple in tree_tuples:
for tn in sorted(getattr(method, 'TEXT', (0,))):
for an in sorted(getattr(method, 'ATTRIBUTES', (0,))):
- benchmarks.append((name, method, tree_tuple, tn, an))
+ benchmarks.append((name, method_call, tree_tuple, tn, an))
return benchmarks
@@ -591,6 +592,9 @@
import time
def run_bench(suite, method_name, method_call, tree_set, tn, an):
+ if method_call is None:
+ raise SkippedTest
+
current_time = time.time
call_repeat = range(10)
@@ -642,13 +646,9 @@
for bench_calls in izip(*benchmarks):
for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)):
- bench_name, method_call = benchmark_setup[:2]
+ bench_name = benchmark_setup[0]
tree_set_name = build_treeset_name(*benchmark_setup[-3:])
print "%-3s: %-28s" % (bench.lib_name, bench_name[6:34]),
- if method_call is None:
- print "skipped"
- continue
-
print "(%-10s)" % tree_set_name,
sys.stdout.flush()
From scoder at codespeak.net Fri May 19 08:17:18 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 08:17:18 +0200 (CEST)
Subject: [Lxml-checkins] r27439 - lxml/trunk
Message-ID: <20060519061718.8A5601006E@code0.codespeak.net>
Author: scoder
Date: Fri May 19 08:17:17 2006
New Revision: 27439
Modified:
lxml/trunk/bench.py
Log:
fix: add text to children of tree roots, previous benchmarks did not actually use it
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Fri May 19 08:17:17 2006
@@ -4,8 +4,8 @@
TREE_FACTOR = 1 # increase tree size with '-l / '-L' cmd option
-_TEXT = "some ASCII text" * 10 * TREE_FACTOR
-_UTEXT = u"some klingon: \F8D2" * 10 * TREE_FACTOR
+_TEXT = "some ASCII text" * TREE_FACTOR
+_UTEXT = u"some klingon: \F8D2" * TREE_FACTOR
_ATTRIBUTES = {
'{attr}test1' : _UTEXT,
'{attr}test2' : _UTEXT,
@@ -140,6 +140,7 @@
root = self.etree.Element('{abc}rootnode')
for ch1 in atoz:
el = SubElement(root, "{bcd}"+ch1*5, attributes)
+ el.text = text
for ch2 in atoz:
for i in range(20 * TREE_FACTOR):
SubElement(el, "{cdefg}%s%05d" % (ch2, i))
@@ -156,6 +157,7 @@
for ch1 in atoz:
for i in range(20 * TREE_FACTOR):
el = SubElement(root, "{bcd}"+ch1*5, attributes)
+ el.text = text
for ch2 in atoz:
SubElement(el, "{cdefg}%s%05d" % (ch2, i))
t = current_time() - t
@@ -172,6 +174,8 @@
tag_no = count().next
children = [ SubElement(c, "{bcd}a%05d" % i, attributes)
for i,c in enumerate(chain(children, children, children)) ]
+ for child in root:
+ child.text = text
t = current_time() - t
return (root, t)
@@ -185,6 +189,7 @@
children = [root]
for ch1 in atoz:
el = SubElement(root, "{bcd}"+ch1*5, attributes)
+ el.text = text
SubElement(el, "{cdefg}abcde", attributes)
SubElement(el, "{cdefg}bcdef", attributes)
t = current_time() - t
From scoder at codespeak.net Fri May 19 08:40:11 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 08:40:11 +0200 (CEST)
Subject: [Lxml-checkins] r27440 - lxml/trunk
Message-ID: <20060519064011.B5CE610063@code0.codespeak.net>
Author: scoder
Date: Fri May 19 08:40:10 2006
New Revision: 27440
Modified:
lxml/trunk/bench.py
Log:
cleanup in bench.py
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Fri May 19 08:40:10 2006
@@ -72,7 +72,7 @@
setattr(self, fname, lambda : deepcopy(root))
else:
def set_property(root, fname):
- setattr(self, fname, self.et_make_factory(root))
+ setattr(self, fname, self.et_make_clone_factory(root))
attribute_list = list(izip(count(), ({}, _ATTRIBUTES)))
text_list = list(izip(count(), (None, _TEXT, _UTEXT)))
@@ -95,7 +95,7 @@
def tree_builder(self, tree, tn, an):
return getattr(self, self._tree_builder_name(tree, tn, an))
- def et_make_factory(self, elem):
+ def et_make_clone_factory(self, elem):
def generate_elem(append, elem, level):
var = "e" + str(level)
arg = repr(elem.tag)
@@ -343,7 +343,6 @@
child.append(el)
def bench_makeelement(self, root):
- Element = self.etree.Element
empty_attrib = {}
for child in root:
child.makeelement('{test}test', empty_attrib)
@@ -567,8 +566,8 @@
pass
try:
- sys.argv.remove('-a')
# 'all' ?
+ sys.argv.remove('-a')
from elementtree import ElementTree as ET
_etrees.append(ET)
except (ValueError, ImportError):
From scoder at codespeak.net Fri May 19 08:43:34 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 08:43:34 +0200 (CEST)
Subject: [Lxml-checkins] r27441 - lxml/trunk
Message-ID: <20060519064334.ED59F10063@code0.codespeak.net>
Author: scoder
Date: Fri May 19 08:43:34 2006
New Revision: 27441
Modified:
lxml/trunk/bench.py
Log:
do not use unicode in attribute values
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Fri May 19 08:43:34 2006
@@ -7,8 +7,8 @@
_TEXT = "some ASCII text" * TREE_FACTOR
_UTEXT = u"some klingon: \F8D2" * TREE_FACTOR
_ATTRIBUTES = {
- '{attr}test1' : _UTEXT,
- '{attr}test2' : _UTEXT,
+ '{attr}test1' : _TEXT,
+ '{attr}test2' : _TEXT,
'bla1' : _TEXT,
'bla2' : _TEXT,
'bla3' : _TEXT
From scoder at codespeak.net Fri May 19 10:44:22 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 10:44:22 +0200 (CEST)
Subject: [Lxml-checkins] r27448 - lxml/trunk
Message-ID: <20060519084422.0FDE110063@code0.codespeak.net>
Author: scoder
Date: Fri May 19 10:44:21 2006
New Revision: 27448
Modified:
lxml/trunk/bench.py
Log:
fix getiterator benchmarks to actually find the searched elements, new benchmarks for findall and replacing children within same document
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Fri May 19 10:44:21 2006
@@ -61,6 +61,8 @@
'cElementTree' : 'cET'
}
+ SEARCH_TAG = "{cdefg}00001"
+
def __init__(self, etree):
self.etree = etree
libname = etree.__name__.split('.')[-1]
@@ -172,7 +174,7 @@
children = [root]
for i in range(6 + TREE_FACTOR):
tag_no = count().next
- children = [ SubElement(c, "{bcd}a%05d" % i, attributes)
+ children = [ SubElement(c, "{cdefg}a%05d" % i, attributes)
for i,c in enumerate(chain(children, children, children)) ]
for child in root:
child.text = text
@@ -190,8 +192,8 @@
for ch1 in atoz:
el = SubElement(root, "{bcd}"+ch1*5, attributes)
el.text = text
- SubElement(el, "{cdefg}abcde", attributes)
- SubElement(el, "{cdefg}bcdef", attributes)
+ SubElement(el, "{cdefg}00001", attributes)
+ SubElement(el, "{cdefg}00002", attributes)
t = current_time() - t
return (root, t)
@@ -347,12 +349,17 @@
for child in root:
child.makeelement('{test}test', empty_attrib)
- def bench_replace_children(self, root):
+ def bench_replace_children_element(self, root):
Element = self.etree.Element
for child in root:
el = Element('{test}test')
child[:] = [el]
+ def bench_replace_children(self, root):
+ Element = self.etree.Element
+ for child in root:
+ child[:] = [ child[0] ]
+
def bench_remove_children(self, root):
for child in root:
root.remove(child)
@@ -430,14 +437,20 @@
for child in root[-100:-5]:
root.index(child, start=-100, stop=-5)
- def bench_getiterator(self, root):
+ def bench_getiterator_all(self, root):
+ list(root.getiterator())
+
+ def bench_getiterator_islice(self, root):
list(islice(root.getiterator(), 10, 110))
def bench_getiterator_tag(self, root):
- list(islice(root.getiterator("{b}a"), 3, 10))
+ list(islice(root.getiterator(self.SEARCH_TAG), 3, 10))
def bench_getiterator_tag_all(self, root):
- list(root.getiterator("{b}a"))
+ list(root.getiterator(self.SEARCH_TAG))
+
+ def bench_findall(self, root):
+ root.findall(".//" + self.SEARCH_TAG)
@onlylib('lxe')
def bench_xpath_class(self, root):
From scoder at codespeak.net Fri May 19 11:35:37 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 11:35:37 +0200 (CEST)
Subject: [Lxml-checkins] r27449 - lxml/trunk
Message-ID: <20060519093537.A055610061@code0.codespeak.net>
Author: scoder
Date: Fri May 19 11:35:36 2006
New Revision: 27449
Modified:
lxml/trunk/bench.py
Log:
xpath and findall benchmarks
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Fri May 19 11:35:36 2006
@@ -450,6 +450,9 @@
list(root.getiterator(self.SEARCH_TAG))
def bench_findall(self, root):
+ root.findall(".//*")
+
+ def bench_findall_tag(self, root):
root.findall(".//" + self.SEARCH_TAG)
@onlylib('lxe')
@@ -459,12 +462,23 @@
xpath(child)
@onlylib('lxe')
+ def bench_xpath_class_repeat(self, root):
+ for child in root:
+ xpath = self.etree.XPath("./*[0]")
+ xpath(child)
+
+ @onlylib('lxe')
def bench_xpath_element(self, root):
+ xpath = self.etree.XPathElementEvaluator(root)
for child in root:
- xpath = self.etree.XPathElementEvaluator(child)
xpath.evaluate("./*[0]")
@onlylib('lxe')
+ def bench_xpath_method(self, root):
+ for child in root:
+ child.xpath("./*[0]")
+
+ @onlylib('lxe')
def bench_xpath_extensions_old(self, root):
def return_child(_, element):
if element:
From scoder at codespeak.net Fri May 19 11:54:57 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 11:54:57 +0200 (CEST)
Subject: [Lxml-checkins] r27453 - lxml/trunk/doc
Message-ID: <20060519095457.D1A7610061@code0.codespeak.net>
Author: scoder
Date: Fri May 19 11:54:56 2006
New Revision: 27453
Added:
lxml/trunk/doc/performance.txt
Modified:
lxml/trunk/doc/main.txt
Log:
new doc/performance.txt to compare lxml with ET and cET
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Fri May 19 11:54:56 2006
@@ -75,9 +75,10 @@
Documentation
-------------
-lxml follows the `ElementTree API`_ as much as possible, building it
-on top of the native libxml2 tree. See also the `ElementTree
-compatibility overview`_.
+lxml follows the ElementTree_ API as much as possible, building it on top of
+the native libxml2 tree. See also the `ElementTree compatibility overview`_
+and the `benchmark results`_ comparing lxml to the original ElementTree_ and
+cElementTree_ implementations.
lxml also `extends this API`_ to expose libxml2 and libxslt specific
functionality, such as XPath_, `Relax NG`_, `XML Schema`_, `XSLT`_, and
@@ -91,17 +92,21 @@
lxml also offers a `SAX compliant API`_, that works with the SAX support
in the standard library.
-.. _`ElementTree API`: http://effbot.org/zone/element-index.htm
+.. _ElementTree: http://effbot.org/zone/element-index.htm
+.. _cElementTree: http://effbot.org/zone/celementtree.htm
+
+.. _`benchmark results`: performance.html
.. _`ElementTree compatibility overview`: compatibility.html
.. _`extends this API`: api.html
.. _`extension functions`: extensions.html
+.. _`implementing namespaces`: namespace_extensions.html
+.. _`SAX compliant API`: sax.html
+
.. _XPath: http://www.w3.org/TR/xpath
.. _`Relax NG`: http://www.relaxng.org/
.. _`XML Schema`: http://www.w3.org/XML/Schema
.. _`XSLT`: http://www.w3.org/TR/xslt
.. _`c14n`: http://www.w3.org/TR/xml-c14n
-.. _`implementing namespaces`: namespace_extensions.html
-.. _`SAX compliant API`: sax.html
Mailing list
------------
Added: lxml/trunk/doc/performance.txt
==============================================================================
--- (empty file)
+++ lxml/trunk/doc/performance.txt Fri May 19 11:54:56 2006
@@ -0,0 +1,245 @@
+Benchmarks and speed
+====================
+
+As an XML library, lxml.etree is very fast. It is also slow. It depends on
+what you do with it. This text describes where lxml.etree (lxe) excels, gives
+hints on some performance traps and compares the overall performance to the
+original ElementTree_ (ET) and cElementTree_ (cET) libraries by Fredrik Lundh.
+The cElementTree library is a fast C-implementation of the original
+ElementTree.
+
+The statements made here are backed by the benchmark script `bench.py`_ that
+comes with the lxml source distribution. The numbers cited below compare lxml
+1.0, ElementTree 1.2.6 and cElementTree 1.0.5.
+
+.. _ElementTree: http://effbot.org/zone/element-index.htm
+.. _cElementTree: http://effbot.org/zone/celementtree.htm
+.. _`bench.py`: http://codespeak.net/svn/lxml/trunk/bench.py
+
+The ``bench.py`` script runs a number of simple tests on the different
+libraries, using different XML tree configurations: different tree sizes, with
+or without attributes (-/A) and with or without ASCII or unicode text (-/S/U).
+In the result extracts cited below, T1 refers to a 3-level tree with many
+children at the third level, T2 is swapped around to have many children at the
+root element, T3 is a deep tree with few children at each level and T4 is a
+small tree, slightly broader than deep.
+
+
+Bad things first
+----------------
+
+First thing to say: there *is* an overhead involved in having a C library
+mimic the ElementTree API. As opposed to ElementTree, lxml has to generate
+Python objects on the fly when asked for them. What this means is: the more
+of your code runs in Python, the slower your application gets. Note, however,
+that this is true for most performance critical Python applications.
+
+
+Parsing and Serialising
+-----------------------
+
+This is one of the areas where lxml excels. The reason is that both parts are
+executed entirely at the C level, without major interaction with Python code.
+The results are rather impressive. Compared to cElementTree, lxml is about 20
+to 40 times faster on serialisation::
+
+ lxe: tostring_utf16 (SA T2) 30.9846 msec/pass
+ cET: tostring_utf16 (SA T2) 715.5002 msec/pass
+ ET : tostring_utf16 (SA T2) 758.5271 msec/pass
+
+ lxe: tostring_utf16 (U- T3) 3.0509 msec/pass
+ cET: tostring_utf16 (U- T3) 72.4721 msec/pass
+ ET : tostring_utf16 (U- T3) 87.0735 msec/pass
+
+ lxe: tostring_utf8 (UA T2) 26.8996 msec/pass
+ cET: tostring_utf8 (UA T2) 700.4889 msec/pass
+ ET : tostring_utf8 (UA T2) 745.3317 msec/pass
+
+ lxe: tostring_utf8 (S- T3) 2.1876 msec/pass
+ cET: tostring_utf8 (S- T3) 71.1290 msec/pass
+ ET : tostring_utf8 (S- T3) 87.1525 msec/pass
+
+For parsing, the difference between the libraries is smaller. The (c)ET
+libraries use the expat parser, which is known to be fast and similar in
+performance to the libxml2 parser. If you take a complete serialize-parse
+cycle, the numbers will look like this::
+
+ lxe: write_utf8_parse_stringIO (S- T1) 187.0444 msec/pass
+ cET: write_utf8_parse_stringIO (S- T1) 828.4068 msec/pass
+ ET : write_utf8_parse_stringIO (S- T1) 1181.0658 msec/pass
+
+ lxe: write_utf8_parse_stringIO (UA T2) 213.6599 msec/pass
+ cET: write_utf8_parse_stringIO (UA T2) 927.2374 msec/pass
+ ET : write_utf8_parse_stringIO (UA T2) 1297.9678 msec/pass
+
+So, lxml also wins this contest, but considering the previous numbers on
+serialization, parser performance is otherwise roughly comparable between cET
+and lxml.
+
+
+The ElementTree API
+-------------------
+
+Since all three libraries implement the same API, their performance is easy to
+compare in this area. A major disadvantage for lxml is the different tree
+model that underlies libxml2. It allows lxml to provide parent pointers for
+elements, but also increases the overhead of tree restructuring. This can be
+seen from the tree setup times of the benchmark::
+
+ Setup times for trees in seconds:
+ lxe: -- S- U- -A SA UA
+ T1: 0.1658 0.1236 0.1241 0.1243 0.1261 0.1254
+ T2: 0.1281 0.1282 0.1299 0.1381 0.1389 0.1395
+ T3: 0.0366 0.0300 0.0290 0.0850 0.0851 0.0893
+ T4: 0.0010 0.0006 0.0006 0.0018 0.0018 0.0019
+ cET: -- S- U- -A SA UA
+ T1: 0.0417 0.0409 0.0403 0.0410 0.0410 0.0415
+ T2: 0.0413 0.0414 0.0413 0.0417 0.0411 0.0417
+ T3: 0.0097 0.0100 0.0099 0.0187 0.0142 0.0146
+ T4: 0.0001 0.0001 0.0001 0.0001 0.0001 0.0001
+ ET : -- S- U- -A SA UA
+ T1: 0.2189 0.2832 0.2210 0.2646 0.2905 0.2214
+ T2: 0.3022 0.2322 0.2868 0.3192 0.2290 0.3075
+ T3: 0.0519 0.0553 0.0527 0.0601 0.0572 0.0911
+ T4: 0.0009 0.0008 0.0008 0.0008 0.0009 0.0009
+
+While lxml is still faster than ET in most cases (30-60%), cET can be three to
+four times as fast as lxml here. So, if the main performance bottleneck of an
+application is creating large XML trees in memory through calls to Element and
+SubElement, cET is the best choice. Note, however, that the serialisation
+performance may even out this advantage.
+
+A critical action for lxml is moving elements between document contexts. It
+requires lxml to do recursive adaptations throughout the moved tree structure.
+
+The following benchmark appends all root children of the second tree to the
+root of the first tree::
+
+ lxe: append_from_document (-- T1,T2) 11.7905 msec/pass
+ cET: append_from_document (-- T1,T2) 0.4673 msec/pass
+ ET : append_from_document (-- T1,T2) 2.0460 msec/pass
+
+ lxe: append_from_document (-- T3,T4) 0.2017 msec/pass
+ cET: append_from_document (-- T3,T4) 0.0227 msec/pass
+ ET : append_from_document (-- T3,T4) 0.1563 msec/pass
+
+Although this are fairly small numbers compared to parsing, this easily shows
+the different performance classes for lxml and (c)ET. Where the latter do not
+have to care about parent pointers and tree structures, lxml has to deep
+traverse the appended tree. The performance difference therefore increases
+with the size of the tree that is moved.
+
+This difference is not always as visible, but applies to most parts of the
+API, like inserting newly created elements::
+
+ lxe: insert_from_document (-- T1,T2) 16.4772 msec/pass
+ cET: insert_from_document (-- T1,T2) 1.1874 msec/pass
+ ET : insert_from_document (-- T1,T2) 3.5447 msec/pass
+
+Or replacing the child slice by a new element::
+
+ lxe: replace_children_element (-- T1 ) 9.1834 msec/pass
+ cET: replace_children_element (-- T1 ) 0.9731 msec/pass
+ ET : replace_children_element (-- T1 ) 14.8213 msec/pass
+
+You should keep this difference in mind when you merge very large trees. On
+the other hand, deep copying a tree is fast in lxml::
+
+ lxe: deepcopy (-- T1 ) 24.7359 msec/pass
+ cET: deepcopy (-- T1 ) 450.5479 msec/pass
+ ET : deepcopy (-- T1 ) 717.8308 msec/pass
+
+ lxe: deepcopy (-- T3 ) 2.1182 msec/pass
+ cET: deepcopy (-- T3 ) 107.2124 msec/pass
+ ET : deepcopy (-- T3 ) 173.9782 msec/pass
+
+So, if you often need to create independent subtrees from a large tree that
+you have parsed in, lxml is the best choice here.
+
+
+Tree traversal
+--------------
+
+Another area where lxml is very fast is iteration for tree traversal. If your
+algorithms can benefit from step-by-step traversal of the XML tree and
+especially if few elements are of interest, lxml is a good choice::
+
+ lxe: getiterator_all (-- T2 ) 32.3100 msec/pass
+ cET: getiterator_all (-- T2 ) 37.2489 msec/pass
+ ET : getiterator_all (-- T2 ) 46.2996 msec/pass
+
+ lxe: getiterator_islice (-- T2 ) 3.3567 msec/pass
+ cET: getiterator_islice (-- T2 ) 0.3289 msec/pass
+ ET : getiterator_islice (-- T2 ) 43.9938 msec/pass
+
+ lxe: getiterator_tag (-- T2 ) 4.7438 msec/pass
+ cET: getiterator_tag (-- T2 ) 31.8628 msec/pass
+ ET : getiterator_tag (-- T2 ) 36.4583 msec/pass
+
+ lxe: getiterator_tag_all (-- T2 ) 4.6267 msec/pass
+ cET: getiterator_tag_all (-- T2 ) 32.1669 msec/pass
+ ET : getiterator_tag_all (-- T2 ) 36.3365 msec/pass
+
+This similarly shows in ``Element.findall()``::
+
+ lxe: findall (-- T2 ) 36.4730 msec/pass
+ cET: findall (-- T2 ) 38.8718 msec/pass
+ ET : findall (-- T2 ) 50.9692 msec/pass
+
+ lxe: findall (-- T3 ) 4.3956 msec/pass
+ cET: findall (-- T3 ) 11.8051 msec/pass
+ ET : findall (-- T3 ) 11.2570 msec/pass
+
+ lxe: findall_tag (-- T2 ) 4.3950 msec/pass
+ cET: findall_tag (-- T2 ) 31.3107 msec/pass
+ ET : findall_tag (-- T2 ) 36.7813 msec/pass
+
+ lxe: findall_tag (-- T3 ) 0.5946 msec/pass
+ cET: findall_tag (-- T3 ) 7.4491 msec/pass
+ ET : findall_tag (-- T3 ) 9.2943 msec/pass
+
+Note that all three libraries currently use the same Python implementation for
+``findall()``, except for their native tree iterator.
+
+
+XPath
+-----
+
+This part of lxml does not have an equivalent in ElementTree. However, lxml
+provides more than one way of accessing it and you should take care which part
+of the lxml API you use. The most straight forward way is to call the
+``xpath()`` method on an Element or ElementTree::
+
+ lxe: xpath_method (-- T1) 9.9304 msec/pass
+ lxe: xpath_method (-- T2) 29.3595 msec/pass
+ lxe: xpath_method (-- T3) 0.2791 msec/pass
+ lxe: xpath_method (-- T4) 0.9906 msec/pass
+
+This is well suited for testing and when the XPath expressions are as diverse
+as the trees they are called on. However, if you have a single XPath
+expression that you want to apply to a larger number of different elements,
+the ``XPath`` class is the most efficient way to do it::
+
+ lxe: xpath_class (-- T1) 4.7921 msec/pass
+ lxe: xpath_class (-- T2) 9.6187 msec/pass
+ lxe: xpath_class (-- T3) 0.2215 msec/pass
+ lxe: xpath_class (-- T4) 0.2697 msec/pass
+
+Note that this still allows you to use variables in the expression, so you can
+parse it once and then adapt it through variables at call time. In other
+cases, where you have a fixed Element or ElementTree and want to run different
+expressions on it, you should consider the ``XPathEvaluator``::
+
+ lxe: xpath_element (-- T1) 5.3826 msec/pass
+ lxe: xpath_element (-- T2) 11.3929 msec/pass
+ lxe: xpath_element (-- T3) 0.2514 msec/pass
+ lxe: xpath_element (-- T4) 0.3038 msec/pass
+
+While it looks slightly slower, creating an XPath object for each of the
+expressions generates a much higher overhead here::
+
+ lxe: xpath_class_repeat (-- T1) 6.8099 msec/pass
+ lxe: xpath_class_repeat (-- T2) 26.7462 msec/pass
+ lxe: xpath_class_repeat (-- T3) 0.3126 msec/pass
+ lxe: xpath_class_repeat (-- T4) 1.1111 msec/pass
+
From scoder at codespeak.net Fri May 19 12:01:38 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 12:01:38 +0200 (CEST)
Subject: [Lxml-checkins] r27454 - lxml/trunk/doc
Message-ID: <20060519100138.C957E10063@code0.codespeak.net>
Author: scoder
Date: Fri May 19 12:01:37 2006
New Revision: 27454
Modified:
lxml/trunk/doc/performance.txt
Log:
small clarifications
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Fri May 19 12:01:37 2006
@@ -62,7 +62,7 @@
For parsing, the difference between the libraries is smaller. The (c)ET
libraries use the expat parser, which is known to be fast and similar in
performance to the libxml2 parser. If you take a complete serialize-parse
-cycle, the numbers will look like this::
+cycle, the numbers will look similar to these::
lxe: write_utf8_parse_stringIO (S- T1) 187.0444 msec/pass
cET: write_utf8_parse_stringIO (S- T1) 828.4068 msec/pass
@@ -153,8 +153,8 @@
cET: deepcopy (-- T3 ) 107.2124 msec/pass
ET : deepcopy (-- T3 ) 173.9782 msec/pass
-So, if you often need to create independent subtrees from a large tree that
-you have parsed in, lxml is the best choice here.
+So, for example, if you often need to create independent subtrees from a large
+tree that you have parsed in, lxml is by far the best choice here.
Tree traversal
From scoder at codespeak.net Fri May 19 12:05:58 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 12:05:58 +0200 (CEST)
Subject: [Lxml-checkins] r27455 - lxml/trunk/doc
Message-ID: <20060519100558.DCCFD1006E@code0.codespeak.net>
Author: scoder
Date: Fri May 19 12:05:57 2006
New Revision: 27455
Modified:
lxml/trunk/doc/performance.txt
Log:
readability
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Fri May 19 12:05:57 2006
@@ -164,39 +164,39 @@
algorithms can benefit from step-by-step traversal of the XML tree and
especially if few elements are of interest, lxml is a good choice::
- lxe: getiterator_all (-- T2 ) 32.3100 msec/pass
- cET: getiterator_all (-- T2 ) 37.2489 msec/pass
- ET : getiterator_all (-- T2 ) 46.2996 msec/pass
-
- lxe: getiterator_islice (-- T2 ) 3.3567 msec/pass
- cET: getiterator_islice (-- T2 ) 0.3289 msec/pass
- ET : getiterator_islice (-- T2 ) 43.9938 msec/pass
-
- lxe: getiterator_tag (-- T2 ) 4.7438 msec/pass
- cET: getiterator_tag (-- T2 ) 31.8628 msec/pass
- ET : getiterator_tag (-- T2 ) 36.4583 msec/pass
-
- lxe: getiterator_tag_all (-- T2 ) 4.6267 msec/pass
- cET: getiterator_tag_all (-- T2 ) 32.1669 msec/pass
- ET : getiterator_tag_all (-- T2 ) 36.3365 msec/pass
+ lxe: getiterator_all (-- T2 ) 32.3100 msec/pass
+ cET: getiterator_all (-- T2 ) 37.2489 msec/pass
+ ET : getiterator_all (-- T2 ) 46.2996 msec/pass
+
+ lxe: getiterator_islice (-- T2 ) 3.3567 msec/pass
+ cET: getiterator_islice (-- T2 ) 0.3289 msec/pass
+ ET : getiterator_islice (-- T2 ) 43.9938 msec/pass
+
+ lxe: getiterator_tag (-- T2 ) 4.7438 msec/pass
+ cET: getiterator_tag (-- T2 ) 31.8628 msec/pass
+ ET : getiterator_tag (-- T2 ) 36.4583 msec/pass
+
+ lxe: getiterator_tag_all (-- T2 ) 4.6267 msec/pass
+ cET: getiterator_tag_all (-- T2 ) 32.1669 msec/pass
+ ET : getiterator_tag_all (-- T2 ) 36.3365 msec/pass
This similarly shows in ``Element.findall()``::
- lxe: findall (-- T2 ) 36.4730 msec/pass
- cET: findall (-- T2 ) 38.8718 msec/pass
- ET : findall (-- T2 ) 50.9692 msec/pass
-
- lxe: findall (-- T3 ) 4.3956 msec/pass
- cET: findall (-- T3 ) 11.8051 msec/pass
- ET : findall (-- T3 ) 11.2570 msec/pass
-
- lxe: findall_tag (-- T2 ) 4.3950 msec/pass
- cET: findall_tag (-- T2 ) 31.3107 msec/pass
- ET : findall_tag (-- T2 ) 36.7813 msec/pass
-
- lxe: findall_tag (-- T3 ) 0.5946 msec/pass
- cET: findall_tag (-- T3 ) 7.4491 msec/pass
- ET : findall_tag (-- T3 ) 9.2943 msec/pass
+ lxe: findall (-- T2 ) 36.4730 msec/pass
+ cET: findall (-- T2 ) 38.8718 msec/pass
+ ET : findall (-- T2 ) 50.9692 msec/pass
+
+ lxe: findall (-- T3 ) 4.3956 msec/pass
+ cET: findall (-- T3 ) 11.8051 msec/pass
+ ET : findall (-- T3 ) 11.2570 msec/pass
+
+ lxe: findall_tag (-- T2 ) 4.3950 msec/pass
+ cET: findall_tag (-- T2 ) 31.3107 msec/pass
+ ET : findall_tag (-- T2 ) 36.7813 msec/pass
+
+ lxe: findall_tag (-- T3 ) 0.5946 msec/pass
+ cET: findall_tag (-- T3 ) 7.4491 msec/pass
+ ET : findall_tag (-- T3 ) 9.2943 msec/pass
Note that all three libraries currently use the same Python implementation for
``findall()``, except for their native tree iterator.
From scoder at codespeak.net Fri May 19 12:08:12 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 12:08:12 +0200 (CEST)
Subject: [Lxml-checkins] r27456 - lxml/trunk/doc
Message-ID: <20060519100812.2DA141006E@code0.codespeak.net>
Author: scoder
Date: Fri May 19 12:08:11 2006
New Revision: 27456
Modified:
lxml/trunk/doc/performance.txt
Log:
readability
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Fri May 19 12:08:11 2006
@@ -8,14 +8,14 @@
The cElementTree library is a fast C-implementation of the original
ElementTree.
-The statements made here are backed by the benchmark script `bench.py`_ that
-comes with the lxml source distribution. The numbers cited below compare lxml
-1.0, ElementTree 1.2.6 and cElementTree 1.0.5.
-
.. _ElementTree: http://effbot.org/zone/element-index.htm
.. _cElementTree: http://effbot.org/zone/celementtree.htm
.. _`bench.py`: http://codespeak.net/svn/lxml/trunk/bench.py
+The statements made here are backed by the benchmark script `bench.py`_ that
+comes with the lxml source distribution. The numbers cited below compare lxml
+1.0, ElementTree 1.2.6 and cElementTree 1.0.5.
+
The ``bench.py`` script runs a number of simple tests on the different
libraries, using different XML tree configurations: different tree sizes, with
or without attributes (-/A) and with or without ASCII or unicode text (-/S/U).
From scoder at codespeak.net Fri May 19 12:08:45 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 12:08:45 +0200 (CEST)
Subject: [Lxml-checkins] r27457 - lxml/trunk/doc
Message-ID: <20060519100845.96B071006E@code0.codespeak.net>
Author: scoder
Date: Fri May 19 12:08:44 2006
New Revision: 27457
Modified:
lxml/trunk/doc/performance.txt
Log:
readability
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Fri May 19 12:08:44 2006
@@ -10,12 +10,13 @@
.. _ElementTree: http://effbot.org/zone/element-index.htm
.. _cElementTree: http://effbot.org/zone/celementtree.htm
-.. _`bench.py`: http://codespeak.net/svn/lxml/trunk/bench.py
The statements made here are backed by the benchmark script `bench.py`_ that
comes with the lxml source distribution. The numbers cited below compare lxml
1.0, ElementTree 1.2.6 and cElementTree 1.0.5.
+.. _`bench.py`: http://codespeak.net/svn/lxml/trunk/bench.py
+
The ``bench.py`` script runs a number of simple tests on the different
libraries, using different XML tree configurations: different tree sizes, with
or without attributes (-/A) and with or without ASCII or unicode text (-/S/U).
From scoder at codespeak.net Fri May 19 12:10:20 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 12:10:20 +0200 (CEST)
Subject: [Lxml-checkins] r27458 - lxml/trunk/doc
Message-ID: <20060519101020.8BA591006E@code0.codespeak.net>
Author: scoder
Date: Fri May 19 12:10:19 2006
New Revision: 27458
Modified:
lxml/trunk/doc/performance.txt
Log:
readability
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Fri May 19 12:10:19 2006
@@ -12,8 +12,8 @@
.. _cElementTree: http://effbot.org/zone/celementtree.htm
The statements made here are backed by the benchmark script `bench.py`_ that
-comes with the lxml source distribution. The numbers cited below compare lxml
-1.0, ElementTree 1.2.6 and cElementTree 1.0.5.
+comes with the lxml source distribution. The numbers that are cited below
+compare lxml 1.0, ElementTree 1.2.6 and cElementTree 1.0.5.
.. _`bench.py`: http://codespeak.net/svn/lxml/trunk/bench.py
From scoder at codespeak.net Fri May 19 12:13:02 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 12:13:02 +0200 (CEST)
Subject: [Lxml-checkins] r27459 - lxml/trunk/doc
Message-ID: <20060519101302.072AD1006E@code0.codespeak.net>
Author: scoder
Date: Fri May 19 12:13:02 2006
New Revision: 27459
Modified:
lxml/trunk/doc/performance.txt
Log:
clarify how the benchmarks work
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Fri May 19 12:13:02 2006
@@ -23,7 +23,8 @@
In the result extracts cited below, T1 refers to a 3-level tree with many
children at the third level, T2 is swapped around to have many children at the
root element, T3 is a deep tree with few children at each level and T4 is a
-small tree, slightly broader than deep.
+small tree, slightly broader than deep. Most benchmarks run in a loop over
+all children of the tree root.
Bad things first
From scoder at codespeak.net Fri May 19 12:17:25 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 12:17:25 +0200 (CEST)
Subject: [Lxml-checkins] r27462 - lxml/trunk/doc
Message-ID: <20060519101725.6C6FC10071@code0.codespeak.net>
Author: scoder
Date: Fri May 19 12:17:24 2006
New Revision: 27462
Modified:
lxml/trunk/doc/performance.txt
Log:
clarify that cET and lxml are close in parser performance
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Fri May 19 12:17:24 2006
@@ -75,8 +75,7 @@
ET : write_utf8_parse_stringIO (UA T2) 1297.9678 msec/pass
So, lxml also wins this contest, but considering the previous numbers on
-serialization, parser performance is otherwise roughly comparable between cET
-and lxml.
+serialization, cET comes rather close in plain parser performance.
The ElementTree API
From scoder at codespeak.net Fri May 19 12:34:46 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 12:34:46 +0200 (CEST)
Subject: [Lxml-checkins] r27463 - lxml/trunk/doc
Message-ID: <20060519103446.A264D10063@code0.codespeak.net>
Author: scoder
Date: Fri May 19 12:34:45 2006
New Revision: 27463
Modified:
lxml/trunk/doc/performance.txt
Log:
make clear why lxml is slower in tree construction
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Fri May 19 12:34:45 2006
@@ -82,33 +82,40 @@
-------------------
Since all three libraries implement the same API, their performance is easy to
-compare in this area. A major disadvantage for lxml is the different tree
-model that underlies libxml2. It allows lxml to provide parent pointers for
-elements, but also increases the overhead of tree restructuring. This can be
-seen from the tree setup times of the benchmark::
+compare in this area. A major disadvantage for lxml's performance is the
+different tree model that underlies libxml2. It allows lxml to provide parent
+pointers for elements, but also increases the overhead of tree building and
+restructuring. This can be seen from the tree setup times of the benchmark
+(given in seconds)::
- Setup times for trees in seconds:
lxe: -- S- U- -A SA UA
T1: 0.1658 0.1236 0.1241 0.1243 0.1261 0.1254
T2: 0.1281 0.1282 0.1299 0.1381 0.1389 0.1395
T3: 0.0366 0.0300 0.0290 0.0850 0.0851 0.0893
T4: 0.0010 0.0006 0.0006 0.0018 0.0018 0.0019
+
cET: -- S- U- -A SA UA
T1: 0.0417 0.0409 0.0403 0.0410 0.0410 0.0415
T2: 0.0413 0.0414 0.0413 0.0417 0.0411 0.0417
T3: 0.0097 0.0100 0.0099 0.0187 0.0142 0.0146
T4: 0.0001 0.0001 0.0001 0.0001 0.0001 0.0001
+
ET : -- S- U- -A SA UA
T1: 0.2189 0.2832 0.2210 0.2646 0.2905 0.2214
T2: 0.3022 0.2322 0.2868 0.3192 0.2290 0.3075
T3: 0.0519 0.0553 0.0527 0.0601 0.0572 0.0911
T4: 0.0009 0.0008 0.0008 0.0008 0.0009 0.0009
-While lxml is still faster than ET in most cases (30-60%), cET can be three to
-four times as fast as lxml here. So, if the main performance bottleneck of an
-application is creating large XML trees in memory through calls to Element and
-SubElement, cET is the best choice. Note, however, that the serialisation
-performance may even out this advantage.
+While lxml is still faster than ET in most cases (30-60%), cET can be up to
+three times faster than lxml here. One of the reasons is that lxml must
+additionally discard the created Python elements after their use, when they
+are no longer referenced. ET and cET represent the tree itself through these
+objects, which reduces their overhead in creating them.
+
+So, if the main performance bottleneck of an application is creating large XML
+trees in memory through calls to Element and SubElement, cET is the best
+choice. Note, however, that the serialisation performance may even out this
+advantage.
A critical action for lxml is moving elements between document contexts. It
requires lxml to do recursive adaptations throughout the moved tree structure.
From scoder at codespeak.net Fri May 19 12:36:58 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 12:36:58 +0200 (CEST)
Subject: [Lxml-checkins] r27464 - lxml/trunk/doc
Message-ID: <20060519103658.D140C10063@code0.codespeak.net>
Author: scoder
Date: Fri May 19 12:36:57 2006
New Revision: 27464
Modified:
lxml/trunk/doc/performance.txt
Log:
fix outlier in results
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Fri May 19 12:36:57 2006
@@ -89,7 +89,7 @@
(given in seconds)::
lxe: -- S- U- -A SA UA
- T1: 0.1658 0.1236 0.1241 0.1243 0.1261 0.1254
+ T1: 0.1360 0.1236 0.1241 0.1243 0.1261 0.1254
T2: 0.1281 0.1282 0.1299 0.1381 0.1389 0.1395
T3: 0.0366 0.0300 0.0290 0.0850 0.0851 0.0893
T4: 0.0010 0.0006 0.0006 0.0018 0.0018 0.0019
From scoder at codespeak.net Fri May 19 13:36:31 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 13:36:31 +0200 (CEST)
Subject: [Lxml-checkins] r27467 - lxml/trunk/src/lxml
Message-ID: <20060519113631.507BB1006E@code0.codespeak.net>
Author: scoder
Date: Fri May 19 13:36:29 2006
New Revision: 27467
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/etree.pyx
Log:
refactoring of Element.get/set and Attrib.get/set etc. to use external helper functions, reduces code duplication
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Fri May 19 13:36:29 2006
@@ -100,6 +100,36 @@
c_attrib_node.ns.href)
return funicode(value)
+cdef object _getAttributeValue(_NodeBase element, key, default):
+ cdef char* c_result
+ cdef char* c_tag
+ ns, tag = _getNsTag(key)
+ c_tag = _cstr(tag)
+ if ns is None:
+ c_result = tree.xmlGetNoNsProp(element._c_node, c_tag)
+ else:
+ c_result = tree.xmlGetNsProp(element._c_node, c_tag, _cstr(ns))
+ if c_result is NULL:
+ # XXX free namespace that is not in use..?
+ return default
+ result = funicode(c_result)
+ tree.xmlFree(c_result)
+ return result
+
+cdef void _setAttributeValue(_NodeBase element, key, value):
+ cdef xmlNs* c_ns
+ cdef char* c_value
+ cdef char* c_tag
+ ns, tag = _getNsTag(key)
+ c_tag = _cstr(tag)
+ value = _utf8(value)
+ c_value = _cstr(value)
+ if ns is None:
+ tree.xmlSetProp(element._c_node, c_tag, c_value)
+ else:
+ c_ns = element._doc._findOrBuildNodeNs(element._c_node, _cstr(ns))
+ tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value)
+
cdef object __REPLACE_XML_ENCODING
__REPLACE_XML_ENCODING = re.compile(
r'^(\s*<\?\s*xml[^>]+)\s+encoding\s*=\s*"[^"]*"\s*', re.U).sub
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Fri May 19 13:36:29 2006
@@ -589,7 +589,7 @@
return new_doc.getroot()
def set(self, key, value):
- self.attrib[key] = value
+ _setAttributeValue(self, key, value)
def append(self, _Element element not None):
cdef xmlNode* c_next
@@ -839,22 +839,7 @@
raise ValueError, "list.index(x): x not in list"
def get(self, key, default=None):
- # XXX more redundancy, but might be slightly faster than
- # return self.attrib.get(key, default)
- cdef char* cresult
- cdef char* c_tag
- ns, tag = _getNsTag(key)
- c_tag = _cstr(tag)
- if ns is None:
- cresult = tree.xmlGetNoNsProp(self._c_node, c_tag)
- else:
- cresult = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns))
- if cresult is NULL:
- result = default
- else:
- result = funicode(cresult)
- tree.xmlFree(cresult)
- return result
+ return _getAttributeValue(self, key, default)
def keys(self):
return self.attrib.keys()
@@ -1000,21 +985,9 @@
cdef class _Attrib(_NodeBase):
# MANIPULATORS
def __setitem__(self, key, value):
- cdef xmlNs* c_ns
- cdef char* c_value
- cdef char* c_tag
- ns, tag = _getNsTag(key)
- c_tag = _cstr(tag)
- value = _utf8(value)
- c_value = _cstr(value)
- if ns is None:
- tree.xmlSetProp(self._c_node, c_tag, c_value)
- else:
- c_ns = self._doc._findOrBuildNodeNs(self._c_node, _cstr(ns))
- tree.xmlSetNsProp(self._c_node, c_ns, c_tag, c_value)
+ _setAttributeValue(self, key, value)
def __delitem__(self, key):
- cdef xmlNs* c_ns
cdef xmlAttr* c_attr
cdef char* c_tag
ns, tag = _getNsTag(key)
@@ -1036,21 +1009,20 @@
return repr(result)
def __getitem__(self, key):
- cdef xmlNs* c_ns
- cdef char* cresult
- cdef char* c_tag
- ns, tag = _getNsTag(key)
- c_tag = _cstr(tag)
- if ns is None:
- cresult = tree.xmlGetNoNsProp(self._c_node, c_tag)
- else:
- cresult = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns))
- if cresult is NULL:
- # XXX free namespace that is not in use..?
+ result = _getAttributeValue(self, key, None)
+ if result is None:
raise KeyError, key
- result = funicode(cresult)
- tree.xmlFree(cresult)
- return result
+ else:
+ return result
+
+ def __nonzero__(self):
+ cdef xmlNode* c_node
+ c_node = (self._c_node.properties)
+ while c_node is not NULL:
+ if c_node.type == tree.XML_ATTRIBUTE_NODE:
+ return True
+ c_node = c_node.next
+ return False
def __len__(self):
cdef Py_ssize_t c
@@ -1064,10 +1036,7 @@
return c
def get(self, key, default=None):
- try:
- return self.__getitem__(key)
- except KeyError:
- return default
+ return _getAttributeValue(self, key, default)
def keys(self):
result = []
@@ -1116,36 +1085,25 @@
return iter(self.items())
def has_key(self, key):
- cdef xmlNs* c_ns
- cdef char* result
- cdef char* c_tag
- ns, tag = _getNsTag(key)
- c_tag = _cstr(tag)
- if ns is None:
- result = tree.xmlGetNoNsProp(self._c_node, c_tag)
- else:
- result = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns))
- if result is not NULL:
- tree.xmlFree(result)
+ if key in self:
return True
else:
return False
def __contains__(self, key):
- cdef xmlNs* c_ns
- cdef char* result
+ cdef char* c_result
cdef char* c_tag
ns, tag = _getNsTag(key)
c_tag = _cstr(tag)
if ns is None:
- result = tree.xmlGetNoNsProp(self._c_node, c_tag)
- else:
- result = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns))
- if result is not NULL:
- tree.xmlFree(result)
- return True
+ c_result = tree.xmlGetNoNsProp(self._c_node, c_tag)
else:
+ c_result = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns))
+ if c_result is NULL:
return False
+ else:
+ tree.xmlFree(c_result)
+ return True
cdef _Attrib _attribFactory(_Document doc, xmlNode* c_node):
cdef _Attrib result
From scoder at codespeak.net Fri May 19 14:01:20 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 14:01:20 +0200 (CEST)
Subject: [Lxml-checkins] r27468 - lxml/trunk/src/lxml
Message-ID: <20060519120120.6F02310063@code0.codespeak.net>
Author: scoder
Date: Fri May 19 14:01:18 2006
New Revision: 27468
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
be more conservative in ElementDepthFirstIterator to prevent possible problems under tree modifications, some cleanup
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Fri May 19 14:01:18 2006
@@ -1163,12 +1163,12 @@
tree it traverses is modified during iteration.
"""
# we keep Python references here to control GC
- # keep next node to return and a stack of position state in the tree
+ # keep next node to return and a depth counter in the tree
+ cdef _NodeBase _next_node
+ cdef Py_ssize_t _depth
cdef object _pystrings
cdef char* _href
cdef char* _name
- cdef Py_ssize_t _depth
- cdef _NodeBase _next_node
def __init__(self, _NodeBase node not None, tag=None):
self._next_node = node
self._depth = 0
@@ -1226,7 +1226,7 @@
c_node = _findDepthFirstInFollowingSiblings(
c_parent, self._href, self._name)
- if c_node is NULL:
+ if c_node is NULL or not _isElement(c_parent):
self._next_node = None
return # all found, nothing left
# we are at a sibling, so set c_parent to our parent
From scoder at codespeak.net Fri May 19 14:21:15 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 14:21:15 +0200 (CEST)
Subject: [Lxml-checkins] r27469 - lxml/trunk/src/lxml
Message-ID: <20060519122115.8102210063@code0.codespeak.net>
Author: scoder
Date: Fri May 19 14:21:14 2006
New Revision: 27469
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/cstd.pxd
lxml/trunk/src/lxml/tree.pxd
Log:
some cleanup in _getNsTag, use cstd.strchr instead of tree.Strchr
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Fri May 19 14:21:14 2006
@@ -371,23 +371,26 @@
Return None for NS uri if no namespace URI available.
"""
cdef char* c_tag
- cdef char* c_pos
- cdef int nslen
+ cdef char* c_ns_end
+ cdef Py_ssize_t taglen
+ cdef Py_ssize_t nslen
if isinstance(tag, QName):
tag = (tag).text
tag = _utf8(tag)
c_tag = _cstr(tag)
if c_tag[0] == c'{':
- c_pos = tree.xmlStrchr(c_tag+1, c'}')
- if c_pos is NULL:
+ c_tag = c_tag + 1
+ c_ns_end = cstd.strchr(c_tag, c'}')
+ if c_ns_end is NULL:
raise ValueError, "Invalid tag name"
- nslen = c_pos - c_tag - 1
- ns = python.PyString_FromStringAndSize(c_tag+1, nslen)
- tag = python.PyString_FromString(c_pos+1)
+ nslen = c_ns_end - c_tag
+ taglen = python.PyString_GET_SIZE(tag) - nslen - 2
+ ns = python.PyString_FromStringAndSize(c_tag, nslen)
+ tag = python.PyString_FromStringAndSize(c_ns_end+1, taglen)
else:
ns = None
return ns, tag
-
+
cdef object _namespacedName(xmlNode* c_node):
cdef char* href
cdef char* name
Modified: lxml/trunk/src/lxml/cstd.pxd
==============================================================================
--- lxml/trunk/src/lxml/cstd.pxd (original)
+++ lxml/trunk/src/lxml/cstd.pxd Fri May 19 14:21:14 2006
@@ -6,6 +6,7 @@
ctypedef int size_t
cdef int strlen(char* s)
cdef char* strstr(char* haystack, char* needle)
+ cdef char* strchr(char* haystack, int needle)
cdef int strcmp(char* s1, char* s2)
cdef int strncmp(char* s1, char* s2, size_t len)
cdef void* memcpy(void* dest, void* src, size_t len)
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Fri May 19 14:21:14 2006
@@ -236,7 +236,6 @@
cdef extern from "libxml/xmlstring.h":
cdef char* xmlStrdup(char* cur)
- cdef char* xmlStrchr(char* cur, char value)
cdef extern from "etree.h":
cdef int _isElement(xmlNode* node)
From scoder at codespeak.net Fri May 19 14:22:12 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 14:22:12 +0200 (CEST)
Subject: [Lxml-checkins] r27470 - lxml/trunk
Message-ID: <20060519122212.ED7A710063@code0.codespeak.net>
Author: scoder
Date: Fri May 19 14:22:11 2006
New Revision: 27470
Modified:
lxml/trunk/bench.py
Log:
forgotten debug output in bench.py
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Fri May 19 14:22:11 2006
@@ -615,7 +615,6 @@
selected = []
for name in sys.argv[1:]:
selected.append(name)
- print selected
benchmarks = [ [ b for b in bs
if [ match for match in selected
if match in b[0] ] ]
From scoder at codespeak.net Fri May 19 15:09:18 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 15:09:18 +0200 (CEST)
Subject: [Lxml-checkins] r27473 - lxml/trunk
Message-ID: <20060519130918.F0F5010061@code0.codespeak.net>
Author: scoder
Date: Fri May 19 15:09:17 2006
New Revision: 27473
Modified:
lxml/trunk/bench.py
Log:
more forgotten debug code in bench.py
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Fri May 19 15:09:17 2006
@@ -281,7 +281,6 @@
@with_text(text=True, utext=True)
def bench_tostring_utf8_unicode_XML(self, root):
xml = unicode(self.etree.tostring(root, 'UTF-8'), 'UTF-8')
- open("test%03d.txt" % len(root), 'w').write(xml.encode('UTF-8'))
self.etree.XML(xml)
@with_attributes(True)
From scoder at codespeak.net Fri May 19 15:19:54 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 15:19:54 +0200 (CEST)
Subject: [Lxml-checkins] r27474 - in lxml/trunk/src/lxml: . tests
Message-ID: <20060519131954.433D710063@code0.codespeak.net>
Author: scoder
Date: Fri May 19 15:19:53 2006
New Revision: 27474
Modified:
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
Element.__contains__ for quick check if an element has a certain child, some cleanup
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Fri May 19 15:19:53 2006
@@ -764,6 +764,13 @@
c_node = _findChildBackwards(self._c_node, 0)
return c_node != NULL
+ def __contains__(self, element):
+ cdef xmlNode* c_node
+ if not isinstance(element, _NodeBase):
+ return 0
+ c_node = (<_NodeBase>element)._c_node
+ return c_node is not NULL and c_node.parent is self._c_node
+
def __iter__(self):
return ElementChildIterator(self)
@@ -1020,9 +1027,9 @@
c_node = (self._c_node.properties)
while c_node is not NULL:
if c_node.type == tree.XML_ATTRIBUTE_NODE:
- return True
+ return 1
c_node = c_node.next
- return False
+ return 0
def __len__(self):
cdef Py_ssize_t c
@@ -1100,10 +1107,10 @@
else:
c_result = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns))
if c_result is NULL:
- return False
+ return 0
else:
tree.xmlFree(c_result)
- return True
+ return 1
cdef _Attrib _attribFactory(_Document doc, xmlNode* c_node):
cdef _Attrib result
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Fri May 19 15:19:53 2006
@@ -89,6 +89,24 @@
self.assertEquals('two', root[1].tag)
self.assertEquals('three', root[2].tag)
+ def test_element_contains(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ root1 = Element('root')
+ SubElement(root1, 'one')
+ self.assert_(root1[0] in root1)
+
+ root2 = Element('root')
+ SubElement(root2, 'two')
+ SubElement(root2, 'three')
+ self.assert_(root2[0] in root2)
+ self.assert_(root2[1] in root2)
+
+ self.assertFalse(root1[0] in root2)
+ self.assertFalse(root2[0] in root1)
+ self.assertFalse(None in root2)
+
def test_element_indexing_with_text(self):
ElementTree = self.etree.ElementTree
From scoder at codespeak.net Fri May 19 15:37:59 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 15:37:59 +0200 (CEST)
Subject: [Lxml-checkins] r27475 - lxml/trunk
Message-ID: <20060519133759.4AE8C10063@code0.codespeak.net>
Author: scoder
Date: Fri May 19 15:37:58 2006
New Revision: 27475
Modified:
lxml/trunk/bench.py
Log:
fix tag names in tree 4
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Fri May 19 15:37:58 2006
@@ -61,7 +61,7 @@
'cElementTree' : 'cET'
}
- SEARCH_TAG = "{cdefg}00001"
+ SEARCH_TAG = "{cdefg}a00001"
def __init__(self, etree):
self.etree = etree
@@ -183,17 +183,16 @@
def _setup_tree4(self, text, attributes):
"small tree with 26 2nd level and 2 3rd level children"
- atoz = self.atoz
SubElement = self.etree.SubElement
current_time = time.time
t = current_time()
root = self.etree.Element('{abc}rootnode')
children = [root]
- for ch1 in atoz:
+ for ch1 in self.atoz:
el = SubElement(root, "{bcd}"+ch1*5, attributes)
el.text = text
- SubElement(el, "{cdefg}00001", attributes)
- SubElement(el, "{cdefg}00002", attributes)
+ SubElement(el, "{cdefg}a00001", attributes)
+ SubElement(el, "{cdefg}a00002", attributes)
t = current_time() - t
return (root, t)
From scoder at codespeak.net Fri May 19 15:47:02 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 19 May 2006 15:47:02 +0200 (CEST)
Subject: [Lxml-checkins] r27476 - lxml/trunk/doc
Message-ID: <20060519134702.B252710063@code0.codespeak.net>
Author: scoder
Date: Fri May 19 15:47:01 2006
New Revision: 27476
Modified:
lxml/trunk/doc/performance.txt
Log:
update benchmark results in doc/performance.txt for bench.py changes
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Fri May 19 15:47:01 2006
@@ -172,21 +172,21 @@
algorithms can benefit from step-by-step traversal of the XML tree and
especially if few elements are of interest, lxml is a good choice::
- lxe: getiterator_all (-- T2 ) 32.3100 msec/pass
- cET: getiterator_all (-- T2 ) 37.2489 msec/pass
- ET : getiterator_all (-- T2 ) 46.2996 msec/pass
-
- lxe: getiterator_islice (-- T2 ) 3.3567 msec/pass
- cET: getiterator_islice (-- T2 ) 0.3289 msec/pass
- ET : getiterator_islice (-- T2 ) 43.9938 msec/pass
-
- lxe: getiterator_tag (-- T2 ) 4.7438 msec/pass
- cET: getiterator_tag (-- T2 ) 31.8628 msec/pass
- ET : getiterator_tag (-- T2 ) 36.4583 msec/pass
-
- lxe: getiterator_tag_all (-- T2 ) 4.6267 msec/pass
- cET: getiterator_tag_all (-- T2 ) 32.1669 msec/pass
- ET : getiterator_tag_all (-- T2 ) 36.3365 msec/pass
+ lxe: getiterator_all (-- T2 ) 31.2719 msec/pass
+ cET: getiterator_all (-- T2 ) 36.3687 msec/pass
+ ET : getiterator_all (-- T2 ) 46.2846 msec/pass
+
+ lxe: getiterator_islice (-- T2 ) 2.8503 msec/pass
+ cET: getiterator_islice (-- T2 ) 0.3299 msec/pass
+ ET : getiterator_islice (-- T2 ) 44.5898 msec/pass
+
+ lxe: getiterator_tag (-- T2 ) 3.0983 msec/pass
+ cET: getiterator_tag (-- T2 ) 11.2861 msec/pass
+ ET : getiterator_tag (-- T2 ) 37.5661 msec/pass
+
+ lxe: getiterator_tag_all (-- T2 ) 4.9760 msec/pass
+ cET: getiterator_tag_all (-- T2 ) 33.2602 msec/pass
+ ET : getiterator_tag_all (-- T2 ) 37.6200 msec/pass
This similarly shows in ``Element.findall()``::
From scoder at codespeak.net Sat May 20 13:06:57 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 20 May 2006 13:06:57 +0200 (CEST)
Subject: [Lxml-checkins] r27500 - lxml/trunk/src/lxml/tests
Message-ID: <20060520110657.041C910072@code0.codespeak.net>
Author: scoder
Date: Sat May 20 13:06:56 2006
New Revision: 27500
Modified:
lxml/trunk/src/lxml/tests/test_htmlparser.py
lxml/trunk/src/lxml/tests/test_unicode.py
Log:
fix encoding used in test cases
Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_htmlparser.py (original)
+++ lxml/trunk/src/lxml/tests/test_htmlparser.py Sat May 20 13:06:56 2006
@@ -8,7 +8,7 @@
import tempfile
from common_imports import StringIO, etree, fileInTestDir
-from common_imports import SillyFileLike, HelperTestCase, unentitify
+from common_imports import SillyFileLike, HelperTestCase
class HtmlParserTestCaseBase(HelperTestCase):
"""HTML parser test cases
@@ -29,7 +29,7 @@
def test_module_HTML_unicode(self):
element = self.etree.HTML(self.uhtml_str)
- self.assertEqual(unentitify(self.etree.tostring(element)),
+ self.assertEqual(unicode(self.etree.tostring(element, 'UTF8'), 'UTF8'),
self.uhtml_str)
def test_module_parse_html_error(self):
@@ -67,15 +67,15 @@
parser = self.etree.HTMLParser()
f = SillyFileLike(self.html_str)
tree = self.etree.parse(f, parser)
- html = self.etree.tostring(tree.getroot())
- self.assertEqual(unentitify(html), self.html_str)
+ html = self.etree.tostring(tree.getroot(), 'UTF-8')
+ self.assertEqual(html, self.html_str)
def test_module_parse_html_filelike_unicode(self):
parser = self.etree.HTMLParser()
f = SillyFileLike(self.uhtml_str)
tree = self.etree.parse(f, parser)
- html = self.etree.tostring(tree.getroot())
- self.assertEqual(unentitify(html), self.uhtml_str)
+ html = self.etree.tostring(tree.getroot(), 'UTF-8')
+ self.assertEqual(unicode(html, 'UTF-8'), self.uhtml_str)
def test_html_file_error(self):
parser = self.etree.HTMLParser()
Modified: lxml/trunk/src/lxml/tests/test_unicode.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_unicode.py (original)
+++ lxml/trunk/src/lxml/tests/test_unicode.py Sat May 20 13:06:56 2006
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
import unittest, doctest
-from common_imports import StringIO, etree, SillyFileLike, unentitify
+from common_imports import StringIO, etree, SillyFileLike
ascii_uni = u'a'
@@ -45,7 +45,8 @@
# parse unicode from unamed file object (not support by ElementTree)
f = SillyFileLike(uxml)
root = etree.parse(f).getroot()
- self.assertEquals(unentitify(etree.tostring(root)), uxml)
+ self.assertEquals(unicode(etree.tostring(root, 'UTF-8'), 'UTF-8'),
+ uxml)
def test_suite():
suite = unittest.TestSuite()
From scoder at codespeak.net Sat May 20 13:08:24 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 20 May 2006 13:08:24 +0200 (CEST)
Subject: [Lxml-checkins] r27501 - lxml/trunk/src/lxml/tests
Message-ID: <20060520110824.945811006E@code0.codespeak.net>
Author: scoder
Date: Sat May 20 13:08:23 2006
New Revision: 27501
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
removed broken test case that used an invalid encoding
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Sat May 20 13:08:23 2006
@@ -1675,19 +1675,9 @@
a.text = u'S?k p? nettet'
self.assertXML(
u'S?k p? nettet '.encode('UTF-8'),
- a)
-
- def test_encoding2(self):
- ElementTree = self.etree.ElementTree
- Element = self.etree.Element
-
- a = Element('a')
- a.text = u'S?k p? nettet'
- self.assertXML(
- u'S?k p? nettet '.encode('UTF-8'),
a, 'UTF-8')
- def test_encoding3(self):
+ def test_encoding2(self):
ElementTree = self.etree.ElementTree
Element = self.etree.Element
From scoder at codespeak.net Sat May 20 13:21:11 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 20 May 2006 13:21:11 +0200 (CEST)
Subject: [Lxml-checkins] r27502 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20060520112111.53D3210070@code0.codespeak.net>
Author: scoder
Date: Sat May 20 13:21:08 2006
New Revision: 27502
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_elementtree.py
lxml/trunk/src/lxml/tree.pxd
lxml/trunk/src/lxml/xmlwriter.pxi
Log:
some cleanup in encoding setup for tostring and write, raise LookupError if requested encoding cannot be found
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sat May 20 13:21:08 2006
@@ -12,6 +12,8 @@
Bugs fixed
----------
+* Serialization functions now raise LookupError for unknown encodings
+
* Memory deallocation crash resulting from deep copying elements
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Sat May 20 13:21:08 2006
@@ -334,8 +334,12 @@
cdef _Document _doc
cdef _NodeBase _context_node
- # we have to take care here: the document may not have a root node!
cdef _assertHasRoot(self):
+ """We have to take care here: the document may not have a root node!
+ This can happen if ElementTree() is called without any argument and
+ the caller 'forgets' to call parse() afterwards, so this is a bug in
+ the caller program.
+ """
assert self._context_node is not None, \
"ElementTree not initialized, missing root"
@@ -357,15 +361,20 @@
def __get__(self):
return DocInfo(self._doc)
- def write(self, file, encoding='us-ascii', pretty_print=False):
+ def write(self, file, encoding=None, pretty_print=False):
+ """Write the tree to a file or file-like object.
+
+ Defaults to ASCII encoding.
+ """
self._assertHasRoot()
- if encoding in ('utf8', 'UTF8', 'utf-8'):
- encoding = 'UTF-8'
- if encoding == 'UTF-8' or encoding == 'us-ascii':
- # XXX this is purely for ElementTree compatibility..
+ # suppress decl. in default case (purely for ElementTree compatibility)
+ if encoding is None:
+ encoding = 'ASCII'
write_declaration = 0
else:
- write_declaration = 1
+ encoding = encoding.upper()
+ write_declaration = encoding not in \
+ ('US-ASCII', 'ASCII', 'UTF8', 'UTF-8')
_tofilelike(file, self._context_node, encoding,
write_declaration, bool(pretty_print))
@@ -521,7 +530,7 @@
# MANIPULATORS
- def __setitem__(self, Py_ssize_t index, _NodeBase element):
+ def __setitem__(self, Py_ssize_t index, _NodeBase element not None):
cdef xmlNode* c_node
cdef xmlNode* c_next
c_node = _findChild(self._c_node, index)
@@ -1370,16 +1379,24 @@
def dump(_NodeBase elem not None):
_dumpToFile(sys.stdout, elem._c_node)
-def tostring(element_or_tree, encoding='us-ascii',
+def tostring(element_or_tree, encoding=None,
xml_declaration=None, pretty_print=False):
- "Serialize an element to an encoded string representation of its XML tree."
+ """Serialize an element to an encoded string representation of its XML
+ tree.
+
+ Defaults to ASCII encoding without XML declaration.
+ """
cdef int write_declaration
cdef int c_pretty_print
- encoding = str(encoding)
+ if encoding is None:
+ encoding = 'ASCII'
+ else:
+ encoding = encoding.upper()
c_pretty_print = bool(pretty_print)
if xml_declaration is None:
# by default, write an XML declaration only for non-standard encodings
- write_declaration = (encoding != 'us-ascii')
+ write_declaration = encoding not in \
+ ('ASCII', 'UTF-8', 'UTF8', 'US-ASCII')
else:
write_declaration = bool(xml_declaration)
@@ -1397,8 +1414,9 @@
tree.
Note that the result does not carry an XML encoding declaration and is
- therefore not necessarily suited for serialization without further
- treatment."""
+ therefore not necessarily suited for serialization to byte streams without
+ further treatment.
+ """
cdef int c_pretty_print
c_pretty_print = bool(pretty_print)
if isinstance(element_or_tree, _NodeBase):
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Sat May 20 13:21:08 2006
@@ -1731,6 +1731,14 @@
a.text = u'S?k p? nettet'
self.assert_(tostring(a, 'UTF-8') in [xml, prologue + xml])
+ def test_encoding_tostring_unknown(self):
+ Element = self.etree.Element
+ tostring = self.etree.tostring
+
+ a = Element('a')
+ a.text = u'S?k p? nettet'
+ self.assertRaises(LookupError, tostring, a, 'Invalid Encoding')
+
def test_encoding_tostring_sub(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Sat May 20 13:21:08 2006
@@ -38,7 +38,7 @@
cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name)
cdef xmlCharEncodingHandler* xmlGetCharEncodingHandler(int enc)
cdef int xmlDetectCharEncoding(char* text, int len)
- cdef char* xmlGetCharEncodingName(int enc)
+ cdef char* xmlGetCharEncodingName(xmlCharEncoding enc)
cdef extern from "libxml/hash.h":
ctypedef struct xmlHashTable
Modified: lxml/trunk/src/lxml/xmlwriter.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlwriter.pxi (original)
+++ lxml/trunk/src/lxml/xmlwriter.pxi Sat May 20 13:21:08 2006
@@ -12,12 +12,16 @@
cdef char* c_version
if element is None:
return None
- if encoding in ('utf8', 'UTF8', 'utf-8'):
- encoding = 'UTF-8'
- c_enc = encoding
+ if encoding is None:
+ c_enc = NULL
+ else:
+ c_enc = encoding
# it is necessary to *and* find the encoding handler *and* use
# encoding during output
enchandler = tree.xmlFindCharEncodingHandler(c_enc)
+ if enchandler is NULL:
+ raise LookupError, python.PyString_FromFormat(
+ "unknown encoding: '%s'", c_enc)
c_buffer = tree.xmlAllocOutputBuffer(enchandler)
if c_buffer is NULL:
raise LxmlError, "Failed to create output buffer"
@@ -146,8 +150,11 @@
c_enc = NULL
else:
c_enc = encoding
-
enchandler = tree.xmlFindCharEncodingHandler(c_enc)
+ if enchandler is NULL:
+ raise LookupError, python.PyString_FromFormat(
+ "unknown encoding: '%s'", c_enc)
+
if python.PyString_Check(f) or python.PyUnicode_Check(f):
filename = _utf8(f)
c_buffer = tree.xmlOutputBufferCreateFilename(
From scoder at codespeak.net Sat May 20 13:27:33 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 20 May 2006 13:27:33 +0200 (CEST)
Subject: [Lxml-checkins] r27503 - lxml/trunk
Message-ID: <20060520112733.78B0910070@code0.codespeak.net>
Author: scoder
Date: Sat May 20 13:27:32 2006
New Revision: 27503
Modified:
lxml/trunk/CHANGES.txt
Log:
CHANGES.txt: make 'deep copy maintains doc info' feature a fixed bug
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sat May 20 13:27:32 2006
@@ -7,11 +7,11 @@
Features added
--------------
-* Deep copying Elements and ElementTrees maintains the document information
-
Bugs fixed
----------
+* Deep copying Elements and ElementTrees maintains the document information
+
* Serialization functions now raise LookupError for unknown encodings
* Memory deallocation crash resulting from deep copying elements
From scoder at codespeak.net Sat May 20 19:01:14 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 20 May 2006 19:01:14 +0200 (CEST)
Subject: [Lxml-checkins] r27513 - in lxml/trunk: . doc src/lxml
src/lxml/tests
Message-ID: <20060520170114.E0CFB10070@code0.codespeak.net>
Author: scoder
Date: Sat May 20 19:01:11 2006
New Revision: 27513
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/compatibility.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_elementtree.py
lxml/trunk/src/lxml/tests/test_etree.py
lxml/trunk/src/lxml/tests/test_unicode.py
lxml/trunk/src/lxml/tree.pxd
Log:
implemented setting text of comments, make whitespace handling around comment texts consistent in lxml (not ET compatible on serialization!)
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sat May 20 19:01:11 2006
@@ -7,6 +7,8 @@
Features added
--------------
+* Comment texts can now be changed through the API
+
Bugs fixed
----------
Modified: lxml/trunk/doc/compatibility.txt
==============================================================================
--- lxml/trunk/doc/compatibility.txt (original)
+++ lxml/trunk/doc/compatibility.txt Sat May 20 19:01:11 2006
@@ -81,18 +81,22 @@
will be hard to solve. It won't affect some applications, but if you want
to port code you must unfortunately make sure that it doesn't.
-* ElementTree has a bug when serializing an empty Comment (no text argument
- given) to XML, etree serializes this successfully.
-
* When trying to set a subelement using __setitem__ that is in fact not an
Element but some other object, etree raises a TypeError, and ElementTree
raises an AssertionError. This also applies to some other places of the
API. In general, etree tries to avoid AssertionErrors in favour of being
more specific about the reason for the exception.
+* ElementTree has a bug when serializing an empty Comment (no text argument
+ given) to XML, etree serializes this successfully.
+
* ElementTree ignores comments when parsing XML, while etree will read them in
and treat them as Comment elements.
+* ElementTree adds whitespace around comments on serialization, lxml does
+ not. This means that a comment text "text" that ElementTree serializes as
+ "" will become "" in lxml.
+
* Because etree is built on top of libxml2, which is namespace prefix aware,
etree preserves namespaces declarations and prefixes while ElementTree tends
to come up with its own prefixes (ns0, ns1, etc). When no namespace prefix
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Sat May 20 19:01:11 2006
@@ -960,11 +960,30 @@
property text:
def __get__(self):
- return funicode(self._c_node.content)
+ if self._c_node.content is NULL:
+ return ''
+ else:
+ return funicode(self._c_node.content)
def __set__(self, value):
- pass
-
+ cdef tree.xmlDict* c_dict
+ cdef char* c_text
+ if value is None:
+ value = ''
+ else:
+ value = _utf8(value)
+ c_text = self._c_node.content
+ if c_text is not NULL:
+ if self._c_node.doc is not NULL:
+ c_dict = self._c_node.doc.dict
+ else:
+ c_dict = NULL
+ # this code is copied from libxml2's DICT_FREE
+ if c_dict is NULL or \
+ tree.xmlDictOwns(c_dict, c_text) == 0:
+ tree.xmlFree(c_text)
+ self._c_node.content = tree.xmlStrdup(_cstr(value))
+
# ACCESSORS
def __repr__(self):
return "" % self.text
@@ -1307,12 +1326,12 @@
cdef xmlNode* c_node
cdef xmlDoc* c_doc
if text is None:
- text = ' '
+ text = ''
else:
- text = ' %s ' % _utf8(text)
+ text = _utf8(text)
c_doc = _newDoc()
doc = _documentFactory(c_doc, None)
- c_node = _createComment(c_doc, text)
+ c_node = _createComment(c_doc, _cstr(text))
tree.xmlAddChild(c_doc, c_node)
return _commentFactory(doc, c_node)
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Sat May 20 19:01:11 2006
@@ -706,9 +706,19 @@
a = Element('a')
a.append(Comment('foo'))
- self.assertXML(
- ' ',
- a)
+ self.assertEqual(a[0].text, 'foo')
+
+ def test_comment_text(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+ Comment = self.etree.Comment
+
+ a = Element('a')
+ a.append(Comment('foo'))
+ self.assertEqual(a[0].text, 'foo')
+
+ a[0].text = "TEST"
+ self.assertEqual(a[0].text, 'TEST')
def test_comment_whitespace(self):
Element = self.etree.Element
@@ -717,9 +727,7 @@
a = Element('a')
a.append(Comment(' foo '))
- self.assertXML(
- ' ',
- a)
+ self.assertEqual(a[0].text, ' foo ')
def test_comment_nonsense(self):
Comment = self.etree.Comment
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Sat May 20 19:01:11 2006
@@ -144,10 +144,26 @@
a = Element('a')
a.append(Comment())
self.assertEquals(
- ' ',
+ ' ',
self._writeElement(a))
- # ignores Comment in ElementTree
+ # ElementTree ignores comments
+ def test_comment_parse_empty(self):
+ ElementTree = self.etree.ElementTree
+ tostring = self.etree.tostring
+
+ xml = ' '
+ f = StringIO(xml)
+ doc = ElementTree(file=f)
+ a = doc.getroot()
+ self.assertEquals(
+ '',
+ a[1].text)
+ self.assertEquals(
+ xml,
+ tostring(a))
+
+ # ElementTree ignores comments
def test_comment_no_proxy_yet(self):
ElementTree = self.etree.ElementTree
@@ -158,6 +174,35 @@
' hoi ',
a[1].text)
+ # ElementTree adds whitespace around comments
+ def test_comment_text(self):
+ Element = self.etree.Element
+ Comment = self.etree.Comment
+ tostring = self.etree.tostring
+
+ a = Element('a')
+ a.append(Comment('foo'))
+ self.assertEquals(
+ ' ',
+ tostring(a))
+
+ a[0].text = "TEST"
+ self.assertEquals(
+ ' ',
+ tostring(a))
+
+ # ElementTree adds whitespace around comments
+ def test_comment_whitespace(self):
+ Element = self.etree.Element
+ Comment = self.etree.Comment
+ tostring = self.etree.tostring
+
+ a = Element('a')
+ a.append(Comment(' foo '))
+ self.assertEquals(
+ ' ',
+ tostring(a))
+
# test weird dictionary interaction leading to segfault previously
def test_weird_dict_interaction(self):
root = self.etree.Element('root')
Modified: lxml/trunk/src/lxml/tests/test_unicode.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_unicode.py (original)
+++ lxml/trunk/src/lxml/tests/test_unicode.py Sat May 20 19:01:11 2006
@@ -35,7 +35,7 @@
def test_unicode_comment(self):
el = etree.Comment(uni)
- self.assertEquals(' %s ' % uni, el.text)
+ self.assertEquals(uni, el.text)
def test_unicode_parse_stringio(self):
el = etree.parse(StringIO(u'%s
' % uni)).getroot()
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Sat May 20 19:01:11 2006
@@ -51,6 +51,7 @@
# for some reason need to define this in this section;
# libxml/dict.h appears to be broken to include in C
ctypedef struct xmlDict
+ cdef int xmlDictOwns(xmlDict* dict, char* name)
ctypedef struct xmlDoc
ctypedef struct xmlAttr
From scoder at codespeak.net Sun May 21 19:47:13 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 21 May 2006 19:47:13 +0200 (CEST)
Subject: [Lxml-checkins] r27542 - in lxml/trunk: . src/lxml
Message-ID: <20060521174713.7865910075@code0.codespeak.net>
Author: scoder
Date: Sun May 21 19:47:05 2006
New Revision: 27542
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tree.pxd
lxml/trunk/src/lxml/xmlwriter.pxi
Log:
fix memory leak when using iconv concerters, support pretty_print in dump() defaulting to True (as it is for debug anyway), some cleanup
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sun May 21 19:47:05 2006
@@ -12,6 +12,8 @@
Bugs fixed
----------
+* Memory leak when using iconv encoders in tostring/write
+
* Deep copying Elements and ElementTrees maintains the document information
* Serialization functions now raise LookupError for unknown encodings
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Sun May 21 19:47:05 2006
@@ -1395,8 +1395,8 @@
def iselement(element):
return isinstance(element, _Element)
-def dump(_NodeBase elem not None):
- _dumpToFile(sys.stdout, elem._c_node)
+def dump(_NodeBase elem not None, pretty_print=True):
+ _dumpToFile(sys.stdout, elem._c_node, bool(pretty_print))
def tostring(element_or_tree, encoding=None,
xml_declaration=None, pretty_print=False):
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Sun May 21 19:47:05 2006
@@ -37,6 +37,7 @@
ctypedef struct xmlCharEncodingHandler
cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name)
cdef xmlCharEncodingHandler* xmlGetCharEncodingHandler(int enc)
+ cdef int xmlCharEncCloseFunc(xmlCharEncodingHandler* handler)
cdef int xmlDetectCharEncoding(char* text, int len)
cdef char* xmlGetCharEncodingName(xmlCharEncoding enc)
Modified: lxml/trunk/src/lxml/xmlwriter.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlwriter.pxi (original)
+++ lxml/trunk/src/lxml/xmlwriter.pxi Sun May 21 19:47:05 2006
@@ -24,6 +24,7 @@
"unknown encoding: '%s'", c_enc)
c_buffer = tree.xmlAllocOutputBuffer(enchandler)
if c_buffer is NULL:
+ tree.xmlCharEncCloseFunc(enchandler)
raise LxmlError, "Failed to create output buffer"
try:
@@ -39,6 +40,7 @@
tree.xmlBufferLength(c_result_buffer))
finally:
tree.xmlOutputBufferClose(c_buffer)
+ tree.xmlCharEncCloseFunc(enchandler)
return result
cdef _tounicode(_NodeBase element, int pretty_print):
@@ -86,7 +88,6 @@
tree.xmlOutputBufferWriteString(c_buffer, encoding)
tree.xmlOutputBufferWriteString(c_buffer, "'?>\n")
-
cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
char* encoding, int pretty_print):
"Write the element tail."
@@ -98,7 +99,7 @@
# output to file-like objects
-cdef class _FileWriter:
+cdef class _FilelikeWriter:
cdef object _filelike
cdef _ExceptionContext _exc_context
def __init__(self, filelike, exc_context=None):
@@ -135,14 +136,14 @@
return 0
cdef int _writeFilelikeWriter(void* ctxt, char* c_buffer, int len):
- return (<_FileWriter>ctxt).write(c_buffer, len)
+ return (<_FilelikeWriter>ctxt).write(c_buffer, len)
cdef int _closeFilelikeWriter(void* ctxt):
- return (<_FileWriter>ctxt).close()
+ return (<_FilelikeWriter>ctxt).close()
cdef _tofilelike(f, _NodeBase element, encoding,
int write_xml_declaration, int pretty_print):
- cdef _FileWriter writer
+ cdef _FilelikeWriter writer
cdef tree.xmlOutputBuffer* c_buffer
cdef tree.xmlCharEncodingHandler* enchandler
cdef char* c_enc
@@ -160,26 +161,28 @@
c_buffer = tree.xmlOutputBufferCreateFilename(
_cstr(filename), enchandler, 0)
elif hasattr(f, 'write'):
- writer = _FileWriter(f)
+ writer = _FilelikeWriter(f)
c_buffer = writer._createOutputBuffer(enchandler)
else:
+ tree.xmlCharEncCloseFunc(enchandler)
raise TypeError, "File or filename expected, got '%s'" % type(f)
_writeNodeToBuffer(c_buffer, element._c_node, c_enc,
write_xml_declaration, pretty_print)
tree.xmlOutputBufferClose(c_buffer)
+ tree.xmlCharEncCloseFunc(enchandler)
if writer is not None:
writer._exc_context._raise_if_stored()
# dump node to file (mainly for debug)
-cdef _dumpToFile(f, xmlNode* c_node):
+cdef _dumpToFile(f, xmlNode* c_node, int pretty_print):
cdef tree.xmlOutputBuffer* c_buffer
if not python.PyFile_Check(f):
raise ValueError, "Not a file"
c_buffer = tree.xmlOutputBufferCreateFile(python.PyFile_AsFile(f), NULL)
- tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, 0, NULL)
+ tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, pretty_print, NULL)
_writeTail(c_buffer, c_node, NULL, 0)
tree.xmlOutputBufferWriteString(c_buffer, '\n')
tree.xmlOutputBufferFlush(c_buffer)
From scoder at codespeak.net Sun May 21 20:18:17 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 21 May 2006 20:18:17 +0200 (CEST)
Subject: [Lxml-checkins] r27543 - lxml/trunk/src/lxml
Message-ID: <20060521181817.1314F10076@code0.codespeak.net>
Author: scoder
Date: Sun May 21 20:18:16 2006
New Revision: 27543
Added:
lxml/trunk/src/lxml/serializer.pxi
- copied unchanged from r27542, lxml/trunk/src/lxml/xmlwriter.pxi
Removed:
lxml/trunk/src/lxml/xmlwriter.pxi
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
renamed xmlwriter.pxi to serializer.pxi as xmlwriter is misleading in libxml2 context
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Sun May 21 20:18:16 2006
@@ -1462,7 +1462,7 @@
include "nsclasses.pxi" # Namespace implementation and registry
include "docloader.pxi" # Support for custom document loaders
include "parser.pxi" # XML Parser
-include "xmlwriter.pxi" # XML output functions
+include "serializer.pxi" # XML output functions
include "xmlid.pxi" # XMLID and IDDict
include "extensions.pxi" # XPath/XSLT extension functions
include "xpath.pxi" # XPath evaluation
Deleted: /lxml/trunk/src/lxml/xmlwriter.pxi
==============================================================================
--- /lxml/trunk/src/lxml/xmlwriter.pxi Sun May 21 20:18:16 2006
+++ (empty file)
@@ -1,188 +0,0 @@
-# XML serialization and output functions
-
-tree.xmlKeepBlanksDefault(0)
-
-cdef _tostring(_NodeBase element, encoding,
- int write_xml_declaration, int pretty_print):
- "Serialize an element to an encoded string representation of its XML tree."
- cdef tree.xmlOutputBuffer* c_buffer
- cdef tree.xmlBuffer* c_result_buffer
- cdef tree.xmlCharEncodingHandler* enchandler
- cdef char* c_enc
- cdef char* c_version
- if element is None:
- return None
- if encoding is None:
- c_enc = NULL
- else:
- c_enc = encoding
- # it is necessary to *and* find the encoding handler *and* use
- # encoding during output
- enchandler = tree.xmlFindCharEncodingHandler(c_enc)
- if enchandler is NULL:
- raise LookupError, python.PyString_FromFormat(
- "unknown encoding: '%s'", c_enc)
- c_buffer = tree.xmlAllocOutputBuffer(enchandler)
- if c_buffer is NULL:
- tree.xmlCharEncCloseFunc(enchandler)
- raise LxmlError, "Failed to create output buffer"
-
- try:
- _writeNodeToBuffer(c_buffer, element._c_node, c_enc,
- write_xml_declaration, pretty_print)
- tree.xmlOutputBufferFlush(c_buffer)
- if c_buffer.conv is not NULL:
- c_result_buffer = c_buffer.conv
- else:
- c_result_buffer = c_buffer.buffer
- result = python.PyString_FromStringAndSize(
- tree.xmlBufferContent(c_result_buffer),
- tree.xmlBufferLength(c_result_buffer))
- finally:
- tree.xmlOutputBufferClose(c_buffer)
- tree.xmlCharEncCloseFunc(enchandler)
- return result
-
-cdef _tounicode(_NodeBase element, int pretty_print):
- "Serialize an element to the Python unicode representation of its XML tree."
- cdef tree.xmlOutputBuffer* c_buffer
- cdef tree.xmlBuffer* c_result_buffer
- if element is None:
- return None
- c_buffer = tree.xmlAllocOutputBuffer(NULL)
- if c_buffer is NULL:
- raise LxmlError, "Failed to create output buffer"
- try:
- _writeNodeToBuffer(c_buffer, element._c_node, NULL, 0, pretty_print)
- tree.xmlOutputBufferFlush(c_buffer)
- if c_buffer.conv is not NULL:
- c_result_buffer = c_buffer.conv
- else:
- c_result_buffer = c_buffer.buffer
- result = python.PyUnicode_DecodeUTF8(
- tree.xmlBufferContent(c_result_buffer),
- tree.xmlBufferLength(c_result_buffer),
- 'strict')
- finally:
- tree.xmlOutputBufferClose(c_buffer)
- return result
-
-cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer,
- xmlNode* c_node, char* encoding,
- int write_xml_declaration, int pretty_print):
- cdef xmlDoc* c_doc
- c_doc = c_node.doc
- if write_xml_declaration:
- _writeDeclarationToBuffer(c_buffer, c_doc.version, encoding)
-
- tree.xmlNodeDumpOutput(c_buffer, c_doc, c_node, 0, pretty_print, encoding)
- _writeTail(c_buffer, c_node, encoding, pretty_print)
-
-cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
- char* version, char* encoding):
- if version is NULL:
- version = "1.0"
- tree.xmlOutputBufferWriteString(c_buffer, "\n")
-
-cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
- char* encoding, int pretty_print):
- "Write the element tail."
- c_node = c_node.next
- while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE:
- tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0,
- pretty_print, encoding)
- c_node = c_node.next
-
-# output to file-like objects
-
-cdef class _FilelikeWriter:
- cdef object _filelike
- cdef _ExceptionContext _exc_context
- def __init__(self, filelike, exc_context=None):
- self._filelike = filelike
- if exc_context is None:
- self._exc_context = _ExceptionContext()
- else:
- self._exc_context = exc_context
-
- cdef tree.xmlOutputBuffer* _createOutputBuffer(
- self, tree.xmlCharEncodingHandler* enchandler) except NULL:
- cdef tree.xmlOutputBuffer* c_buffer
- c_buffer = tree.xmlOutputBufferCreateIO(
- _writeFilelikeWriter, _closeFilelikeWriter,
- self, enchandler)
- if c_buffer is NULL:
- raise IOError, "Could not create I/O writer context."
- return c_buffer
-
- cdef int write(self, char* c_buffer, int len):
- try:
- if self._filelike is None:
- raise IOError, "File is already closed"
- py_buffer = python.PyString_FromStringAndSize(c_buffer, len)
- self._filelike.write(py_buffer)
- return len
- except Exception:
- self._exc_context._store_raised()
- return -1
-
- cdef int close(self):
- # we should not close the file here as we didn't open it
- self._filelike = None
- return 0
-
-cdef int _writeFilelikeWriter(void* ctxt, char* c_buffer, int len):
- return (<_FilelikeWriter>ctxt).write(c_buffer, len)
-
-cdef int _closeFilelikeWriter(void* ctxt):
- return (<_FilelikeWriter>ctxt).close()
-
-cdef _tofilelike(f, _NodeBase element, encoding,
- int write_xml_declaration, int pretty_print):
- cdef _FilelikeWriter writer
- cdef tree.xmlOutputBuffer* c_buffer
- cdef tree.xmlCharEncodingHandler* enchandler
- cdef char* c_enc
- if encoding is None:
- c_enc = NULL
- else:
- c_enc = encoding
- enchandler = tree.xmlFindCharEncodingHandler(c_enc)
- if enchandler is NULL:
- raise LookupError, python.PyString_FromFormat(
- "unknown encoding: '%s'", c_enc)
-
- if python.PyString_Check(f) or python.PyUnicode_Check(f):
- filename = _utf8(f)
- c_buffer = tree.xmlOutputBufferCreateFilename(
- _cstr(filename), enchandler, 0)
- elif hasattr(f, 'write'):
- writer = _FilelikeWriter(f)
- c_buffer = writer._createOutputBuffer(enchandler)
- else:
- tree.xmlCharEncCloseFunc(enchandler)
- raise TypeError, "File or filename expected, got '%s'" % type(f)
-
- _writeNodeToBuffer(c_buffer, element._c_node, c_enc,
- write_xml_declaration, pretty_print)
-
- tree.xmlOutputBufferClose(c_buffer)
- tree.xmlCharEncCloseFunc(enchandler)
- if writer is not None:
- writer._exc_context._raise_if_stored()
-
-# dump node to file (mainly for debug)
-
-cdef _dumpToFile(f, xmlNode* c_node, int pretty_print):
- cdef tree.xmlOutputBuffer* c_buffer
- if not python.PyFile_Check(f):
- raise ValueError, "Not a file"
- c_buffer = tree.xmlOutputBufferCreateFile(python.PyFile_AsFile(f), NULL)
- tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, pretty_print, NULL)
- _writeTail(c_buffer, c_node, NULL, 0)
- tree.xmlOutputBufferWriteString(c_buffer, '\n')
- tree.xmlOutputBufferFlush(c_buffer)
From scoder at codespeak.net Sun May 21 20:46:51 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 21 May 2006 20:46:51 +0200 (CEST)
Subject: [Lxml-checkins] r27544 - lxml/branch/xmlsave
Message-ID: <20060521184651.36C2310076@code0.codespeak.net>
Author: scoder
Date: Sun May 21 20:46:50 2006
New Revision: 27544
Added:
lxml/branch/xmlsave/
- copied from r27543, lxml/trunk/
Log:
new branch for libxml2 xmlsave output support (XMLFormatter)
From scoder at codespeak.net Sun May 21 21:46:45 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 21 May 2006 21:46:45 +0200 (CEST)
Subject: [Lxml-checkins] r27552 - in lxml/trunk: . src/lxml
Message-ID: <20060521194645.582A810076@code0.codespeak.net>
Author: scoder
Date: Sun May 21 21:46:43 2006
New Revision: 27552
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/xmlerror.pxi
Log:
added last_error attribute in _ErrorLog to access last error or fatal error, some cleanup in xmlerror.pxi
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sun May 21 21:46:43 2006
@@ -7,6 +7,8 @@
Features added
--------------
+* Error logs now have a ``last_error`` attribute for convenience
+
* Comment texts can now be changed through the API
Bugs fixed
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Sun May 21 21:46:43 2006
@@ -74,11 +74,13 @@
cdef class _BaseErrorLog:
"Immutable base version of an error log."
cdef object _entries
- def __init__(self, entries):
+ cdef readonly object last_error
+ def __init__(self, entries, last_error=None):
self._entries = entries
+ self.last_error = last_error
def copy(self):
- return _BaseErrorLog(self._entries)
+ return _BaseErrorLog(self._entries, self.last_error)
def __iter__(self):
return iter(self._entries)
@@ -145,19 +147,7 @@
"Convenience method to get all warnings or worse."
return self.filter_from_level(ErrorLevels.WARNING)
-cdef class _ErrorLog(_BaseErrorLog):
- def __init__(self):
- _BaseErrorLog.__init__(self, [])
-
- def clear(self):
- del self._entries[:]
-
- def copy(self):
- return _BaseErrorLog(self._entries[:])
-
- def __iter__(self):
- return iter(self._entries[:])
-
+cdef class _ExtensibleErrorLog(_BaseErrorLog):
cdef void connect(self):
del self._entries[:]
xmlerror.xmlSetStructuredErrorFunc(self, _receiveError)
@@ -166,12 +156,16 @@
xmlerror.xmlSetStructuredErrorFunc(NULL, _receiveError)
cdef void _receive(self, xmlerror.xmlError* error):
+ cdef int level
cdef _LogEntry entry
entry = _LogEntry()
entry._set(error)
if __GLOBAL_ERROR_LOG is not self:
__GLOBAL_ERROR_LOG.receive(entry)
self.receive(entry)
+ level = error.level
+ if level == xmlerror.XML_ERR_ERROR or level == xmlerror.XML_ERR_FATAL:
+ self.last_error = entry
cdef void _receiveGeneric(self, int domain, int type, int level, int line,
message, filename):
@@ -181,35 +175,52 @@
if __GLOBAL_ERROR_LOG is not self:
__GLOBAL_ERROR_LOG.receive(entry)
self.receive(entry)
+ if level == xmlerror.XML_ERR_ERROR or level == xmlerror.XML_ERR_FATAL:
+ self.last_error = entry
+
+cdef class _ErrorLog(_ExtensibleErrorLog):
+ def __init__(self):
+ _ExtensibleErrorLog.__init__(self, [])
+
+ def clear(self):
+ del self._entries[:]
+
+ def copy(self):
+ return _BaseErrorLog(self._entries[:], self.last_error)
+
+ def __iter__(self):
+ return iter(self._entries[:])
def receive(self, entry):
python.PyList_Append(self._entries, entry)
cdef class _DomainErrorLog(_ErrorLog):
- def receive(self, entry):
- if entry.domain in self._accepted_domains:
- _ErrorLog.receive(self, entry)
def __init__(self, domains):
_ErrorLog.__init__(self)
self._accepted_domains = tuple(domains)
+ def receive(self, entry):
+ if entry.domain in self._accepted_domains:
+ _ErrorLog.receive(self, entry)
+
cdef class _RotatingErrorLog(_ErrorLog):
cdef int _max_len
def __init__(self, max_len):
_ErrorLog.__init__(self)
self._max_len = max_len
+
def receive(self, entry):
entries = self._entries
if python.PyList_GET_SIZE(entries) > self._max_len:
del entries[0]
python.PyList_Append(entries, entry)
-cdef class PyErrorLog(_ErrorLog):
+cdef class PyErrorLog(_ExtensibleErrorLog):
cdef object _log
cdef object _level_map
cdef object _varsOf
def __init__(self, logger_name=None):
- _ErrorLog.__init__(self)
+ _ExtensibleErrorLog.__init__(self, [])
import logging
self._level_map = {
ErrorLevels.WARNING : logging.WARNING,
From scoder at codespeak.net Sun May 21 21:49:03 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 21 May 2006 21:49:03 +0200 (CEST)
Subject: [Lxml-checkins] r27553 - in lxml/branch/xmlsave: . src/lxml
Message-ID: <20060521194903.625B310076@code0.codespeak.net>
Author: scoder
Date: Sun May 21 21:49:01 2006
New Revision: 27553
Modified:
lxml/branch/xmlsave/CHANGES.txt
lxml/branch/xmlsave/src/lxml/xmlerror.pxi
Log:
merged in ErrorLog.last_error support from trunk
Modified: lxml/branch/xmlsave/CHANGES.txt
==============================================================================
--- lxml/branch/xmlsave/CHANGES.txt (original)
+++ lxml/branch/xmlsave/CHANGES.txt Sun May 21 21:49:01 2006
@@ -7,6 +7,8 @@
Features added
--------------
+* Error logs now have a ``last_error`` attribute for convenience
+
* Comment texts can now be changed through the API
Bugs fixed
Modified: lxml/branch/xmlsave/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/branch/xmlsave/src/lxml/xmlerror.pxi (original)
+++ lxml/branch/xmlsave/src/lxml/xmlerror.pxi Sun May 21 21:49:01 2006
@@ -74,11 +74,13 @@
cdef class _BaseErrorLog:
"Immutable base version of an error log."
cdef object _entries
- def __init__(self, entries):
+ cdef readonly object last_error
+ def __init__(self, entries, last_error=None):
self._entries = entries
+ self.last_error = last_error
def copy(self):
- return _BaseErrorLog(self._entries)
+ return _BaseErrorLog(self._entries, self.last_error)
def __iter__(self):
return iter(self._entries)
@@ -145,19 +147,7 @@
"Convenience method to get all warnings or worse."
return self.filter_from_level(ErrorLevels.WARNING)
-cdef class _ErrorLog(_BaseErrorLog):
- def __init__(self):
- _BaseErrorLog.__init__(self, [])
-
- def clear(self):
- del self._entries[:]
-
- def copy(self):
- return _BaseErrorLog(self._entries[:])
-
- def __iter__(self):
- return iter(self._entries[:])
-
+cdef class _ExtensibleErrorLog(_BaseErrorLog):
cdef void connect(self):
del self._entries[:]
xmlerror.xmlSetStructuredErrorFunc(self, _receiveError)
@@ -166,12 +156,16 @@
xmlerror.xmlSetStructuredErrorFunc(NULL, _receiveError)
cdef void _receive(self, xmlerror.xmlError* error):
+ cdef int level
cdef _LogEntry entry
entry = _LogEntry()
entry._set(error)
if __GLOBAL_ERROR_LOG is not self:
__GLOBAL_ERROR_LOG.receive(entry)
self.receive(entry)
+ level = error.level
+ if level == xmlerror.XML_ERR_ERROR or level == xmlerror.XML_ERR_FATAL:
+ self.last_error = entry
cdef void _receiveGeneric(self, int domain, int type, int level, int line,
message, filename):
@@ -181,35 +175,52 @@
if __GLOBAL_ERROR_LOG is not self:
__GLOBAL_ERROR_LOG.receive(entry)
self.receive(entry)
+ if level == xmlerror.XML_ERR_ERROR or level == xmlerror.XML_ERR_FATAL:
+ self.last_error = entry
+
+cdef class _ErrorLog(_ExtensibleErrorLog):
+ def __init__(self):
+ _ExtensibleErrorLog.__init__(self, [])
+
+ def clear(self):
+ del self._entries[:]
+
+ def copy(self):
+ return _BaseErrorLog(self._entries[:], self.last_error)
+
+ def __iter__(self):
+ return iter(self._entries[:])
def receive(self, entry):
python.PyList_Append(self._entries, entry)
cdef class _DomainErrorLog(_ErrorLog):
- def receive(self, entry):
- if entry.domain in self._accepted_domains:
- _ErrorLog.receive(self, entry)
def __init__(self, domains):
_ErrorLog.__init__(self)
self._accepted_domains = tuple(domains)
+ def receive(self, entry):
+ if entry.domain in self._accepted_domains:
+ _ErrorLog.receive(self, entry)
+
cdef class _RotatingErrorLog(_ErrorLog):
cdef int _max_len
def __init__(self, max_len):
_ErrorLog.__init__(self)
self._max_len = max_len
+
def receive(self, entry):
entries = self._entries
if python.PyList_GET_SIZE(entries) > self._max_len:
del entries[0]
python.PyList_Append(entries, entry)
-cdef class PyErrorLog(_ErrorLog):
+cdef class PyErrorLog(_ExtensibleErrorLog):
cdef object _log
cdef object _level_map
cdef object _varsOf
def __init__(self, logger_name=None):
- _ErrorLog.__init__(self)
+ _ExtensibleErrorLog.__init__(self, [])
import logging
self._level_map = {
ErrorLevels.WARNING : logging.WARNING,
From scoder at codespeak.net Sun May 21 22:00:14 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 21 May 2006 22:00:14 +0200 (CEST)
Subject: [Lxml-checkins] r27554 - in lxml/trunk: doc src/lxml
Message-ID: <20060521200014.55DF110075@code0.codespeak.net>
Author: scoder
Date: Sun May 21 22:00:13 2006
New Revision: 27554
Modified:
lxml/trunk/doc/api.txt
lxml/trunk/src/lxml/xmlerror.pxi
Log:
doctest and bug fix for last_error attribute
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Sun May 21 22:00:13 2006
@@ -122,6 +122,13 @@
>>> print entry.domain_name, entry.type_name, entry.filename
PARSER ERR_TAG_NOT_FINISHED
+There is also a convenience attribute ``last_error`` that returns the last
+error::
+
+ >>> entry = e.error_log.last_error
+ >>> print entry.domain_name, entry.type_name, entry.filename
+ PARSER ERR_TAG_NOT_FINISHED
+
Python unicode strings
----------------------
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Sun May 21 22:00:13 2006
@@ -156,15 +156,18 @@
xmlerror.xmlSetStructuredErrorFunc(NULL, _receiveError)
cdef void _receive(self, xmlerror.xmlError* error):
- cdef int level
+ cdef int is_error
cdef _LogEntry entry
entry = _LogEntry()
entry._set(error)
+ is_error = error.level == xmlerror.XML_ERR_ERROR or \
+ error.level == xmlerror.XML_ERR_FATAL
if __GLOBAL_ERROR_LOG is not self:
__GLOBAL_ERROR_LOG.receive(entry)
+ if is_error:
+ __GLOBAL_ERROR_LOG.last_error = entry
self.receive(entry)
- level = error.level
- if level == xmlerror.XML_ERR_ERROR or level == xmlerror.XML_ERR_FATAL:
+ if is_error:
self.last_error = entry
cdef void _receiveGeneric(self, int domain, int type, int level, int line,
@@ -172,10 +175,14 @@
cdef _LogEntry entry
entry = _LogEntry()
entry._setGeneric(domain, type, level, line, message, filename)
+ is_error = level == xmlerror.XML_ERR_ERROR or \
+ level == xmlerror.XML_ERR_FATAL
if __GLOBAL_ERROR_LOG is not self:
__GLOBAL_ERROR_LOG.receive(entry)
+ if is_error:
+ __GLOBAL_ERROR_LOG.last_error = entry
self.receive(entry)
- if level == xmlerror.XML_ERR_ERROR or level == xmlerror.XML_ERR_FATAL:
+ if is_error:
self.last_error = entry
cdef class _ErrorLog(_ExtensibleErrorLog):
From scoder at codespeak.net Sun May 21 22:00:52 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 21 May 2006 22:00:52 +0200 (CEST)
Subject: [Lxml-checkins] r27555 - in lxml/branch/xmlsave: doc src/lxml
Message-ID: <20060521200052.7D0B610075@code0.codespeak.net>
Author: scoder
Date: Sun May 21 22:00:51 2006
New Revision: 27555
Modified:
lxml/branch/xmlsave/doc/api.txt
lxml/branch/xmlsave/src/lxml/xmlerror.pxi
Log:
merged in bug fix for ErrorLog.last_error support from trunk
Modified: lxml/branch/xmlsave/doc/api.txt
==============================================================================
--- lxml/branch/xmlsave/doc/api.txt (original)
+++ lxml/branch/xmlsave/doc/api.txt Sun May 21 22:00:51 2006
@@ -122,6 +122,13 @@
>>> print entry.domain_name, entry.type_name, entry.filename
PARSER ERR_TAG_NOT_FINISHED
+There is also a convenience attribute ``last_error`` that returns the last
+error::
+
+ >>> entry = e.error_log.last_error
+ >>> print entry.domain_name, entry.type_name, entry.filename
+ PARSER ERR_TAG_NOT_FINISHED
+
Python unicode strings
----------------------
Modified: lxml/branch/xmlsave/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/branch/xmlsave/src/lxml/xmlerror.pxi (original)
+++ lxml/branch/xmlsave/src/lxml/xmlerror.pxi Sun May 21 22:00:51 2006
@@ -156,15 +156,18 @@
xmlerror.xmlSetStructuredErrorFunc(NULL, _receiveError)
cdef void _receive(self, xmlerror.xmlError* error):
- cdef int level
+ cdef int is_error
cdef _LogEntry entry
entry = _LogEntry()
entry._set(error)
+ is_error = error.level == xmlerror.XML_ERR_ERROR or \
+ error.level == xmlerror.XML_ERR_FATAL
if __GLOBAL_ERROR_LOG is not self:
__GLOBAL_ERROR_LOG.receive(entry)
+ if is_error:
+ __GLOBAL_ERROR_LOG.last_error = entry
self.receive(entry)
- level = error.level
- if level == xmlerror.XML_ERR_ERROR or level == xmlerror.XML_ERR_FATAL:
+ if is_error:
self.last_error = entry
cdef void _receiveGeneric(self, int domain, int type, int level, int line,
@@ -172,10 +175,14 @@
cdef _LogEntry entry
entry = _LogEntry()
entry._setGeneric(domain, type, level, line, message, filename)
+ is_error = level == xmlerror.XML_ERR_ERROR or \
+ level == xmlerror.XML_ERR_FATAL
if __GLOBAL_ERROR_LOG is not self:
__GLOBAL_ERROR_LOG.receive(entry)
+ if is_error:
+ __GLOBAL_ERROR_LOG.last_error = entry
self.receive(entry)
- if level == xmlerror.XML_ERR_ERROR or level == xmlerror.XML_ERR_FATAL:
+ if is_error:
self.last_error = entry
cdef class _ErrorLog(_ExtensibleErrorLog):
From scoder at codespeak.net Sun May 21 22:54:11 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 21 May 2006 22:54:11 +0200 (CEST)
Subject: [Lxml-checkins] r27556 - lxml/branch/xmlsave/src/lxml
Message-ID: <20060521205411.15D5A10076@code0.codespeak.net>
Author: scoder
Date: Sun May 21 22:54:10 2006
New Revision: 27556
Modified:
lxml/branch/xmlsave/src/lxml/etree.pyx
lxml/branch/xmlsave/src/lxml/serializer.pxi
lxml/branch/xmlsave/src/lxml/tree.pxd
Log:
first shot on XMLFormatter, two entity encoding related test cases still fail
Modified: lxml/branch/xmlsave/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/xmlsave/src/lxml/etree.pyx (original)
+++ lxml/branch/xmlsave/src/lxml/etree.pyx Sun May 21 22:54:10 2006
@@ -361,7 +361,7 @@
def __get__(self):
return DocInfo(self._doc)
- def write(self, file, encoding=None, pretty_print=False):
+ def write(self, file, encoding=None, formatter=None):
"""Write the tree to a file or file-like object.
Defaults to ASCII encoding.
@@ -375,8 +375,7 @@
encoding = encoding.upper()
write_declaration = encoding not in \
('US-ASCII', 'ASCII', 'UTF8', 'UTF-8')
- _tofilelike(file, self._context_node, encoding,
- write_declaration, bool(pretty_print))
+ _tofilelike(file, self._context_node, encoding, formatter)
def getiterator(self, tag=None):
root = self.getroot()
Modified: lxml/branch/xmlsave/src/lxml/serializer.pxi
==============================================================================
--- lxml/branch/xmlsave/src/lxml/serializer.pxi (original)
+++ lxml/branch/xmlsave/src/lxml/serializer.pxi Sun May 21 22:54:10 2006
@@ -2,6 +2,83 @@
tree.xmlKeepBlanksDefault(0)
+class XMLOutputError(LxmlError):
+ pass
+
+cdef class XMLFormatter:
+ cdef int _pretty_print
+ cdef int _write_declaration
+ cdef int _save_options
+ cdef _ErrorLog _error_log
+
+ def __init__(self, pretty_print=False, write_declaration=None,
+ split_empty_tags=False):
+ cdef int save_options
+ save_options = tree.XML_SAVE_NO_XHTML
+
+ if pretty_print:
+ self._pretty_print = True
+ save_options = save_options | tree.XML_SAVE_FORMAT
+ else:
+ self._pretty_print = False
+
+ if split_empty_tags:
+ save_options = save_options | tree.XML_SAVE_NO_EMPTY
+
+ if write_declaration is None:
+ self._write_declaration = -1
+ elif write_declaration:
+ self._write_declaration = True
+ else:
+ self._write_declaration = False
+ save_options = save_options | tree.XML_SAVE_NO_DECL
+
+ self._save_options = save_options
+ self._error_log = _ErrorLog()
+
+ property error_log:
+ def __get__(self):
+ return self._error_log.copy()
+
+ cdef int _optionsForEncoding(self, encoding):
+ cdef int save_options
+ if self._write_declaration != -1:
+ return self._save_options
+ # purely for ElementTree compatibility: suppress decl. in default cases
+ save_options = self._save_options
+ if encoding is None:
+ save_options = save_options | tree.XML_SAVE_NO_DECL
+ elif encoding.upper() in ('US-ASCII', 'ASCII', 'UTF8', 'UTF-8'):
+ save_options = save_options | tree.XML_SAVE_NO_DECL
+ return save_options
+
+ cdef int _saveNode(self, tree.xmlSaveCtxt* save_ctxt,
+ xmlNode* c_node) except -1:
+ cdef long result
+ self._error_log.connect()
+ result = tree.xmlSaveTree(save_ctxt, c_node)
+ tree.xmlSaveClose(save_ctxt)
+ self._error_log.disconnect()
+ if result < 0:
+ error = self._error_log.last_error
+ if error is not None:
+ if error.domain == xmlerror.XML_FROM_IO:
+ raise IOError, error.message
+ else:
+ raise XMLOutputError, error.message
+ else:
+ raise XMLOutputError, "Error serializing the tree"
+ else:
+ return 0
+
+cdef class XHTMLFormatter(XMLFormatter):
+ def __init__(self, **kwargs):
+ XMLFormatter.__init__(self, **kwargs)
+ self._save_options = self._save_options & (~tree.XML_SAVE_NO_XHTML)
+
+cdef XMLFormatter __DEFAULT_XML_FORMATTER
+__DEFAULT_XML_FORMATTER = XMLFormatter()
+
cdef _tostring(_NodeBase element, encoding,
int write_xml_declaration, int pretty_print):
"Serialize an element to an encoded string representation of its XML tree."
@@ -119,6 +196,24 @@
raise IOError, "Could not create I/O writer context."
return c_buffer
+ cdef tree.xmlSaveCtxt* _createSaveContext(
+ self, char* encoding, int save_options) except NULL:
+ cdef tree.xmlCharEncodingHandler* enchandler
+ cdef tree.xmlSaveCtxt* c_ctxt
+ c_ctxt = tree.xmlSaveToIO(
+ _writeFilelikeWriter, _closeFilelikeWriter,
+ self, encoding, save_options)
+ if c_ctxt is NULL:
+ # this is only done to check if we knew the encoding
+ enchandler = tree.xmlFindCharEncodingHandler(encoding)
+ if enchandler is NULL:
+ raise LookupError, python.PyString_FromFormat(
+ "unknown encoding: '%s'", encoding)
+ else:
+ tree.xmlCharEncCloseFunc(enchandler)
+ raise IOError, "Could not create I/O writer context."
+ return c_ctxt
+
cdef int write(self, char* c_buffer, int len):
try:
if self._filelike is None:
@@ -141,37 +236,31 @@
cdef int _closeFilelikeWriter(void* ctxt):
return (<_FilelikeWriter>ctxt).close()
-cdef _tofilelike(f, _NodeBase element, encoding,
- int write_xml_declaration, int pretty_print):
+cdef _tofilelike(f, _NodeBase element, encoding, XMLFormatter formatter):
cdef _FilelikeWriter writer
- cdef tree.xmlOutputBuffer* c_buffer
- cdef tree.xmlCharEncodingHandler* enchandler
+ cdef tree.xmlSaveCtxt* save_ctxt
cdef char* c_enc
+ cdef int save_options
if encoding is None:
c_enc = NULL
else:
+ encoding = encoding.upper()
c_enc = encoding
- enchandler = tree.xmlFindCharEncodingHandler(c_enc)
- if enchandler is NULL:
- raise LookupError, python.PyString_FromFormat(
- "unknown encoding: '%s'", c_enc)
+ if formatter is None:
+ formatter = __DEFAULT_XML_FORMATTER
+ save_options = formatter._optionsForEncoding(encoding)
if python.PyString_Check(f) or python.PyUnicode_Check(f):
- filename = _utf8(f)
- c_buffer = tree.xmlOutputBufferCreateFilename(
- _cstr(filename), enchandler, 0)
+ filename = _utf8(f)
+ save_ctxt = tree.xmlSaveToFilename(
+ _cstr(filename), c_enc, save_options)
elif hasattr(f, 'write'):
- writer = _FilelikeWriter(f)
- c_buffer = writer._createOutputBuffer(enchandler)
+ writer = _FilelikeWriter(f)
+ save_ctxt = writer._createSaveContext(c_enc, save_options)
else:
- tree.xmlCharEncCloseFunc(enchandler)
raise TypeError, "File or filename expected, got '%s'" % type(f)
- _writeNodeToBuffer(c_buffer, element._c_node, c_enc,
- write_xml_declaration, pretty_print)
-
- tree.xmlOutputBufferClose(c_buffer)
- tree.xmlCharEncCloseFunc(enchandler)
+ formatter._saveNode(save_ctxt, element._c_node)
if writer is not None:
writer._exc_context._raise_if_stored()
Modified: lxml/branch/xmlsave/src/lxml/tree.pxd
==============================================================================
--- lxml/branch/xmlsave/src/lxml/tree.pxd (original)
+++ lxml/branch/xmlsave/src/lxml/tree.pxd Sun May 21 22:54:10 2006
@@ -227,13 +227,26 @@
FILE* file, xmlCharEncodingHandler* encoder)
cdef xmlOutputBuffer* xmlOutputBufferCreateFilename(
char* URI, xmlCharEncodingHandler* encoder, int compression)
+
cdef extern from "libxml/xmlsave.h":
+ ctypedef enum xmlSaveOption:
+ XML_SAVE_FORMAT = 1 # format save output
+ XML_SAVE_NO_DECL = 2 # drop the xml declaration
+ XML_SAVE_NO_EMPTY = 4 # no empty tags
+ XML_SAVE_NO_XHTML = 8 # disable XHTML1 specific rules
+
ctypedef struct xmlSaveCtxt:
pass
cdef xmlSaveCtxt* xmlSaveToFilename(char* filename, char* encoding,
int options)
+ cdef xmlSaveCtxt* xmlSaveToBuffer(xmlBuffer* buffer, char* encoding,
+ int options)
+ cdef xmlSaveCtxt* xmlSaveToIO(xmlOutputWriteCallback iowrite,
+ xmlOutputCloseCallback ioclose,
+ void* ioctx, char* encoding, int options)
cdef long xmlSaveDoc(xmlSaveCtxt* ctxt, xmlDoc* doc)
+ cdef long xmlSaveTree(xmlSaveCtxt* ctxt, xmlNode* node)
cdef int xmlSaveClose(xmlSaveCtxt* ctxt)
cdef extern from "libxml/xmlstring.h":
From scoder at codespeak.net Mon May 22 07:16:25 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 07:16:25 +0200 (CEST)
Subject: [Lxml-checkins] r27572 - lxml/branch/xmlsave/src/lxml/tests
Message-ID: <20060522051625.CCB6010071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 07:16:24 2006
New Revision: 27572
Modified:
lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py
Log:
fix prologue testing in test_elementtree.py (ET only special cases 'utf-8' encoding name, not 'UTF-8')
Modified: lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py (original)
+++ lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py Mon May 22 07:16:24 2006
@@ -1683,8 +1683,8 @@
a.text = u'S?k p? nettet'
self.assertXML(
u'S?k p? nettet '.encode('UTF-8'),
- a, 'UTF-8')
-
+ a, 'utf-8')
+
def test_encoding2(self):
ElementTree = self.etree.ElementTree
Element = self.etree.Element
@@ -1694,13 +1694,9 @@
f = StringIO()
tree = ElementTree(element=a)
- tree.write(f, 'UTF-8')
- data = f.getvalue()
-
- # XXX prologue generation seems to be inconsistent between libraries..
- xml = u'S?k p? nettet '.encode('UTF-8')
- prologue = u'\n'.encode('UTF-8')
- self.assert_(data in [xml, prologue + xml])
+ tree.write(f, 'utf-8')
+ self.assertEqual(u'S?k p? nettet '.encode('UTF-8'),
+ f.getvalue())
## # ignore wrong (left-over?) encoding declaration in unicode strings
## def _test_wrong_unicode_encoding(self):
@@ -1731,13 +1727,10 @@
Element = self.etree.Element
tostring = self.etree.tostring
- # XXX prologue generation seems to be inconsistent between libraries..
- xml = u'S?k p? nettet '.encode('UTF-8')
- prologue = u'\n'.encode('UTF-8')
-
a = Element('a')
a.text = u'S?k p? nettet'
- self.assert_(tostring(a, 'UTF-8') in [xml, prologue + xml])
+ self.assertEqual(u'S?k p? nettet '.encode('UTF-8'),
+ tostring(a, 'utf-8'))
def test_encoding_tostring_unknown(self):
Element = self.etree.Element
@@ -1755,11 +1748,8 @@
a = Element('a')
b = SubElement(a, 'b')
b.text = u'S?k p? nettet'
-
- # XXX prologue generation seems to be inconsistent between libraries..
- xml = u'S?k p? nettet '.encode('UTF-8')
- prologue = u'\n'.encode('UTF-8')
- self.assert_(tostring(b, 'UTF-8') in [xml, prologue + xml])
+ self.assertEqual(u'S?k p? nettet '.encode('UTF-8'),
+ tostring(b, 'utf-8'))
def test_encoding_tostring_sub_tail(self):
Element = self.etree.Element
@@ -1770,9 +1760,8 @@
b = SubElement(a, 'b')
b.text = u'S?k p? nettet'
b.tail = u'S?k'
- xml = u'S?k p? nettet S?k'.encode('UTF-8')
- prologue = u'\n'.encode('UTF-8')
- self.assert_(tostring(b, 'UTF-8') in [xml, prologue + xml])
+ self.assertEqual(u'S?k p? nettet S?k'.encode('UTF-8'),
+ tostring(b, 'utf-8'))
def test_encoding_tostring_default_encoding(self):
Element = self.etree.Element
From scoder at codespeak.net Mon May 22 07:17:11 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 07:17:11 +0200 (CEST)
Subject: [Lxml-checkins] r27573 - lxml/trunk/src/lxml/tests
Message-ID: <20060522051711.11B7310071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 07:17:09 2006
New Revision: 27573
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
fix prologue testing in test_elementtree.py (ET only special cases 'utf-8' encoding name, not 'UTF-8')
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Mon May 22 07:17:09 2006
@@ -1683,8 +1683,8 @@
a.text = u'S?k p? nettet'
self.assertXML(
u'S?k p? nettet '.encode('UTF-8'),
- a, 'UTF-8')
-
+ a, 'utf-8')
+
def test_encoding2(self):
ElementTree = self.etree.ElementTree
Element = self.etree.Element
@@ -1694,13 +1694,9 @@
f = StringIO()
tree = ElementTree(element=a)
- tree.write(f, 'UTF-8')
- data = f.getvalue()
-
- # XXX prologue generation seems to be inconsistent between libraries..
- xml = u'S?k p? nettet '.encode('UTF-8')
- prologue = u'\n'.encode('UTF-8')
- self.assert_(data in [xml, prologue + xml])
+ tree.write(f, 'utf-8')
+ self.assertEqual(u'S?k p? nettet '.encode('UTF-8'),
+ f.getvalue())
## # ignore wrong (left-over?) encoding declaration in unicode strings
## def _test_wrong_unicode_encoding(self):
@@ -1731,13 +1727,10 @@
Element = self.etree.Element
tostring = self.etree.tostring
- # XXX prologue generation seems to be inconsistent between libraries..
- xml = u'S?k p? nettet '.encode('UTF-8')
- prologue = u'\n'.encode('UTF-8')
-
a = Element('a')
a.text = u'S?k p? nettet'
- self.assert_(tostring(a, 'UTF-8') in [xml, prologue + xml])
+ self.assertEqual(u'S?k p? nettet '.encode('UTF-8'),
+ tostring(a, 'utf-8'))
def test_encoding_tostring_unknown(self):
Element = self.etree.Element
@@ -1755,11 +1748,8 @@
a = Element('a')
b = SubElement(a, 'b')
b.text = u'S?k p? nettet'
-
- # XXX prologue generation seems to be inconsistent between libraries..
- xml = u'S?k p? nettet '.encode('UTF-8')
- prologue = u'\n'.encode('UTF-8')
- self.assert_(tostring(b, 'UTF-8') in [xml, prologue + xml])
+ self.assertEqual(u'S?k p? nettet '.encode('UTF-8'),
+ tostring(b, 'utf-8'))
def test_encoding_tostring_sub_tail(self):
Element = self.etree.Element
@@ -1770,9 +1760,8 @@
b = SubElement(a, 'b')
b.text = u'S?k p? nettet'
b.tail = u'S?k'
- xml = u'S?k p? nettet S?k'.encode('UTF-8')
- prologue = u'\n'.encode('UTF-8')
- self.assert_(tostring(b, 'UTF-8') in [xml, prologue + xml])
+ self.assertEqual(u'S?k p? nettet S?k'.encode('UTF-8'),
+ tostring(b, 'utf-8'))
def test_encoding_tostring_default_encoding(self):
Element = self.etree.Element
From scoder at codespeak.net Mon May 22 07:23:35 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 07:23:35 +0200 (CEST)
Subject: [Lxml-checkins] r27574 - lxml/trunk/src/lxml/tests
Message-ID: <20060522052335.2A92F10071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 07:23:34 2006
New Revision: 27574
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
test raising parser error on XML declaration in unicode strings
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Mon May 22 07:23:34 2006
@@ -1698,16 +1698,12 @@
self.assertEqual(u'S?k p? nettet '.encode('UTF-8'),
f.getvalue())
-## # ignore wrong (left-over?) encoding declaration in unicode strings
-## def _test_wrong_unicode_encoding(self):
-## XML = self.etree.XML
-
-## test_utf = u'S?k p? nettet '
-## parsed = XML(test_utf)
-## self.assertXML(
-## u'S?k p? nettet '.encode('UTF-8'),
-## parsed, 'UTF-8')
-
+ # raise error on wrong (left-over?) encoding declaration in unicode strings
+ def _test_wrong_unicode_encoding(self):
+ XML = self.etree.XML
+ test_utf = u'S?k p? nettet '
+ self.assertRaises(SyntaxError, XML, test_utf)
+
def test_encoding_default_encoding(self):
ElementTree = self.etree.ElementTree
Element = self.etree.Element
From scoder at codespeak.net Mon May 22 07:24:59 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 07:24:59 +0200 (CEST)
Subject: [Lxml-checkins] r27575 - lxml/branch/xmlsave/src/lxml/tests
Message-ID: <20060522052459.25E1910071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 07:24:58 2006
New Revision: 27575
Modified:
lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py
Log:
merged in test case from trunk
Modified: lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py (original)
+++ lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py Mon May 22 07:24:58 2006
@@ -1698,16 +1698,12 @@
self.assertEqual(u'S?k p? nettet '.encode('UTF-8'),
f.getvalue())
-## # ignore wrong (left-over?) encoding declaration in unicode strings
-## def _test_wrong_unicode_encoding(self):
-## XML = self.etree.XML
-
-## test_utf = u'S?k p? nettet '
-## parsed = XML(test_utf)
-## self.assertXML(
-## u'S?k p? nettet '.encode('UTF-8'),
-## parsed, 'UTF-8')
-
+ # raise error on wrong (left-over?) encoding declaration in unicode strings
+ def _test_wrong_unicode_encoding(self):
+ XML = self.etree.XML
+ test_utf = u'S?k p? nettet '
+ self.assertRaises(SyntaxError, XML, test_utf)
+
def test_encoding_default_encoding(self):
ElementTree = self.etree.ElementTree
Element = self.etree.Element
From scoder at codespeak.net Mon May 22 07:48:37 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 07:48:37 +0200 (CEST)
Subject: [Lxml-checkins] r27576 - lxml/branch/xmlsave/src/lxml
Message-ID: <20060522054837.572F710071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 07:48:36 2006
New Revision: 27576
Modified:
lxml/branch/xmlsave/src/lxml/serializer.pxi
Log:
doc string
Modified: lxml/branch/xmlsave/src/lxml/serializer.pxi
==============================================================================
--- lxml/branch/xmlsave/src/lxml/serializer.pxi (original)
+++ lxml/branch/xmlsave/src/lxml/serializer.pxi Mon May 22 07:48:36 2006
@@ -41,10 +41,11 @@
return self._error_log.copy()
cdef int _optionsForEncoding(self, encoding):
+ """Purely for ElementTree compatibility: suppress XML declaration in
+ default cases."""
cdef int save_options
if self._write_declaration != -1:
return self._save_options
- # purely for ElementTree compatibility: suppress decl. in default cases
save_options = self._save_options
if encoding is None:
save_options = save_options | tree.XML_SAVE_NO_DECL
From scoder at codespeak.net Mon May 22 08:43:16 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 08:43:16 +0200 (CEST)
Subject: [Lxml-checkins] r27577 - lxml/branch/xmlsave/src/lxml/tests
Message-ID: <20060522064316.51C8610071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 08:43:14 2006
New Revision: 27577
Modified:
lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py
lxml/branch/xmlsave/src/lxml/tests/test_htmlparser.py
lxml/branch/xmlsave/src/lxml/tests/test_io.py
Log:
prevent test cases from leaking temp files, some cleanup in test_elementtree.py
Modified: lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py (original)
+++ lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py Mon May 22 08:43:14 2006
@@ -1676,7 +1676,6 @@
)
def test_encoding(self):
- ElementTree = self.etree.ElementTree
Element = self.etree.Element
a = Element('a')
@@ -1704,7 +1703,7 @@
test_utf = u'S?k p? nettet '
self.assertRaises(SyntaxError, XML, test_utf)
- def test_encoding_default_encoding(self):
+ def test_encoding_write_default_encoding(self):
ElementTree = self.etree.ElementTree
Element = self.etree.Element
@@ -1716,7 +1715,7 @@
tree.write(f)
data = f.getvalue()
self.assertEquals(
- 'Søk på nettet ',
+ u'S?k p? nettet '.encode('ASCII', 'xmlcharrefreplace'),
data)
def test_encoding_tostring(self):
@@ -1783,8 +1782,9 @@
# the same, just hex versus decimal
expected = 'Søk på nettet '
- expected2 = 'Søk på nettet '
- self.assert_(tostring(b) in [expected, expected2])
+ self.assertEquals(
+ expected,
+ tostring(b))
def test_deepcopy(self):
Element = self.etree.Element
@@ -1885,15 +1885,17 @@
"""
ElementTree = self.etree.ElementTree
handle, filename = tempfile.mkstemp()
- f = open(filename, 'wb')
- tree = ElementTree(element=element)
- tree.write(f, encoding)
- f.close()
- f = open(filename, 'rb')
- data = unicode(f.read(), encoding)
- f.close()
- os.close(handle)
- os.remove(filename)
+ try:
+ f = open(filename, 'wb')
+ tree = ElementTree(element=element)
+ tree.write(f, encoding)
+ f.close()
+ f = open(filename, 'rb')
+ data = unicode(f.read(), encoding)
+ f.close()
+ finally:
+ os.close(handle)
+ os.remove(filename)
return canonicalize(data)
def assertXML(self, expected, element, encoding='us-ascii'):
Modified: lxml/branch/xmlsave/src/lxml/tests/test_htmlparser.py
==============================================================================
--- lxml/branch/xmlsave/src/lxml/tests/test_htmlparser.py (original)
+++ lxml/branch/xmlsave/src/lxml/tests/test_htmlparser.py Mon May 22 08:43:14 2006
@@ -5,7 +5,7 @@
"""
import unittest
-import tempfile
+import tempfile, os
from common_imports import StringIO, etree, fileInTestDir
from common_imports import SillyFileLike, HelperTestCase
@@ -59,9 +59,13 @@
parser = self.etree.HTMLParser()
filename = tempfile.mktemp(suffix=".html")
open(filename, 'wb').write(self.html_str)
- f = open(filename, 'r')
- tree = self.etree.parse(f, parser)
- self.assertEqual(self.etree.tostring(tree.getroot()), self.html_str)
+ try:
+ f = open(filename, 'r')
+ tree = self.etree.parse(f, parser)
+ f.close()
+ self.assertEqual(self.etree.tostring(tree.getroot()), self.html_str)
+ finally:
+ os.remove(filename)
def test_module_parse_html_filelike(self):
parser = self.etree.HTMLParser()
Modified: lxml/branch/xmlsave/src/lxml/tests/test_io.py
==============================================================================
--- lxml/branch/xmlsave/src/lxml/tests/test_io.py (original)
+++ lxml/branch/xmlsave/src/lxml/tests/test_io.py Mon May 22 08:43:14 2006
@@ -5,7 +5,7 @@
"""
import unittest
-import tempfile, gzip
+import tempfile, gzip, os
from common_imports import etree, ElementTree, fileInTestDir
from common_imports import SillyFileLike, LargeFileLike
@@ -27,15 +27,22 @@
filename = tempfile.mktemp(suffix=".xml")
self.tree.write(filename)
- self.assertEqual(open(filename).read(), self.root_str)
+ try:
+ self.assertEqual(open(filename).read(), self.root_str)
+ finally:
+ os.remove(filename)
def test_module_parse_gzipobject(self):
# (c)ElementTree supports gzip instance as parse argument
filename = tempfile.mktemp(suffix=".xml.gz")
gzip.open(filename, 'wb').write(self.root_str)
- f_gz = gzip.open(filename, 'r')
- tree = self.etree.parse(f_gz)
- self.assertEqual(self.etree.tostring(tree.getroot()), self.root_str)
+ try:
+ f_gz = gzip.open(filename, 'r')
+ tree = self.etree.parse(f_gz)
+ self.assertEqual(self.etree.tostring(tree.getroot()), self.root_str)
+ finally:
+ os.remove(filename)
+
def test_class_parse_filename(self):
# (c)ElementTree class ElementTree has a 'parse' method that returns
@@ -45,26 +52,32 @@
filename = tempfile.mktemp(suffix=".xml")
open(filename, 'wb').write(self.root_str)
- tree = self.etree.ElementTree()
- root = tree.parse(filename)
- self.assertEqual(self.etree.tostring(root), self.root_str)
+ try:
+ tree = self.etree.ElementTree()
+ root = tree.parse(filename)
+ self.assertEqual(self.etree.tostring(root), self.root_str)
+ finally:
+ os.remove(filename)
def test_class_parse_filename_remove_previous(self):
filename = tempfile.mktemp(suffix=".xml")
open(filename, "wb").write(self.root_str)
- tree = self.etree.ElementTree()
- root = tree.parse(filename)
- # and now do it again; previous content should still be there
- root2 = tree.parse(filename)
- self.assertEquals('a', root.tag)
- self.assertEquals('a', root2.tag)
- # now remove all references to root2, and parse again
- del root2
- root3 = tree.parse(filename)
- self.assertEquals('a', root.tag)
- self.assertEquals('a', root3.tag)
- # root2's memory should've been freed here
- # XXX how to check?
+ try:
+ tree = self.etree.ElementTree()
+ root = tree.parse(filename)
+ # and now do it again; previous content should still be there
+ root2 = tree.parse(filename)
+ self.assertEquals('a', root.tag)
+ self.assertEquals('a', root2.tag)
+ # now remove all references to root2, and parse again
+ del root2
+ root3 = tree.parse(filename)
+ self.assertEquals('a', root.tag)
+ self.assertEquals('a', root3.tag)
+ # root2's memory should've been freed here
+ # XXX how to check?
+ finally:
+ os.remove(filename)
def test_class_parse_fileobject(self):
# (c)ElementTree class ElementTree has a 'parse' method that returns
@@ -74,10 +87,13 @@
filename = tempfile.mktemp(suffix=".xml")
open(filename, 'wb').write(self.root_str)
- f = open(filename, 'r')
- tree = self.etree.ElementTree()
- root = tree.parse(f)
- self.assertEqual(self.etree.tostring(root), self.root_str)
+ try:
+ f = open(filename, 'r')
+ tree = self.etree.ElementTree()
+ root = tree.parse(f)
+ self.assertEqual(self.etree.tostring(root), self.root_str)
+ finally:
+ os.remove(filename)
def test_class_parse_unamed_fileobject(self):
# (c)ElementTree class ElementTree has a 'parse' method that returns
From scoder at codespeak.net Mon May 22 08:43:50 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 08:43:50 +0200 (CEST)
Subject: [Lxml-checkins] r27578 - lxml/trunk/src/lxml/tests
Message-ID: <20060522064350.60D7010071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 08:43:49 2006
New Revision: 27578
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
lxml/trunk/src/lxml/tests/test_htmlparser.py
lxml/trunk/src/lxml/tests/test_io.py
Log:
prevent test cases from leaking temp files, some cleanup in test_elementtree.py
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Mon May 22 08:43:49 2006
@@ -1676,7 +1676,6 @@
)
def test_encoding(self):
- ElementTree = self.etree.ElementTree
Element = self.etree.Element
a = Element('a')
@@ -1704,7 +1703,7 @@
test_utf = u'S?k p? nettet '
self.assertRaises(SyntaxError, XML, test_utf)
- def test_encoding_default_encoding(self):
+ def test_encoding_write_default_encoding(self):
ElementTree = self.etree.ElementTree
Element = self.etree.Element
@@ -1716,7 +1715,7 @@
tree.write(f)
data = f.getvalue()
self.assertEquals(
- 'Søk på nettet ',
+ u'S?k p? nettet '.encode('ASCII', 'xmlcharrefreplace'),
data)
def test_encoding_tostring(self):
@@ -1783,8 +1782,9 @@
# the same, just hex versus decimal
expected = 'Søk på nettet '
- expected2 = 'Søk på nettet '
- self.assert_(tostring(b) in [expected, expected2])
+ self.assertEquals(
+ expected,
+ tostring(b))
def test_deepcopy(self):
Element = self.etree.Element
@@ -1885,15 +1885,17 @@
"""
ElementTree = self.etree.ElementTree
handle, filename = tempfile.mkstemp()
- f = open(filename, 'wb')
- tree = ElementTree(element=element)
- tree.write(f, encoding)
- f.close()
- f = open(filename, 'rb')
- data = unicode(f.read(), encoding)
- f.close()
- os.close(handle)
- os.remove(filename)
+ try:
+ f = open(filename, 'wb')
+ tree = ElementTree(element=element)
+ tree.write(f, encoding)
+ f.close()
+ f = open(filename, 'rb')
+ data = unicode(f.read(), encoding)
+ f.close()
+ finally:
+ os.close(handle)
+ os.remove(filename)
return canonicalize(data)
def assertXML(self, expected, element, encoding='us-ascii'):
Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_htmlparser.py (original)
+++ lxml/trunk/src/lxml/tests/test_htmlparser.py Mon May 22 08:43:49 2006
@@ -5,7 +5,7 @@
"""
import unittest
-import tempfile
+import tempfile, os
from common_imports import StringIO, etree, fileInTestDir
from common_imports import SillyFileLike, HelperTestCase
@@ -59,9 +59,13 @@
parser = self.etree.HTMLParser()
filename = tempfile.mktemp(suffix=".html")
open(filename, 'wb').write(self.html_str)
- f = open(filename, 'r')
- tree = self.etree.parse(f, parser)
- self.assertEqual(self.etree.tostring(tree.getroot()), self.html_str)
+ try:
+ f = open(filename, 'r')
+ tree = self.etree.parse(f, parser)
+ f.close()
+ self.assertEqual(self.etree.tostring(tree.getroot()), self.html_str)
+ finally:
+ os.remove(filename)
def test_module_parse_html_filelike(self):
parser = self.etree.HTMLParser()
Modified: lxml/trunk/src/lxml/tests/test_io.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_io.py (original)
+++ lxml/trunk/src/lxml/tests/test_io.py Mon May 22 08:43:49 2006
@@ -5,7 +5,7 @@
"""
import unittest
-import tempfile, gzip
+import tempfile, gzip, os
from common_imports import etree, ElementTree, fileInTestDir
from common_imports import SillyFileLike, LargeFileLike
@@ -27,15 +27,22 @@
filename = tempfile.mktemp(suffix=".xml")
self.tree.write(filename)
- self.assertEqual(open(filename).read(), self.root_str)
+ try:
+ self.assertEqual(open(filename).read(), self.root_str)
+ finally:
+ os.remove(filename)
def test_module_parse_gzipobject(self):
# (c)ElementTree supports gzip instance as parse argument
filename = tempfile.mktemp(suffix=".xml.gz")
gzip.open(filename, 'wb').write(self.root_str)
- f_gz = gzip.open(filename, 'r')
- tree = self.etree.parse(f_gz)
- self.assertEqual(self.etree.tostring(tree.getroot()), self.root_str)
+ try:
+ f_gz = gzip.open(filename, 'r')
+ tree = self.etree.parse(f_gz)
+ self.assertEqual(self.etree.tostring(tree.getroot()), self.root_str)
+ finally:
+ os.remove(filename)
+
def test_class_parse_filename(self):
# (c)ElementTree class ElementTree has a 'parse' method that returns
@@ -45,26 +52,32 @@
filename = tempfile.mktemp(suffix=".xml")
open(filename, 'wb').write(self.root_str)
- tree = self.etree.ElementTree()
- root = tree.parse(filename)
- self.assertEqual(self.etree.tostring(root), self.root_str)
+ try:
+ tree = self.etree.ElementTree()
+ root = tree.parse(filename)
+ self.assertEqual(self.etree.tostring(root), self.root_str)
+ finally:
+ os.remove(filename)
def test_class_parse_filename_remove_previous(self):
filename = tempfile.mktemp(suffix=".xml")
open(filename, "wb").write(self.root_str)
- tree = self.etree.ElementTree()
- root = tree.parse(filename)
- # and now do it again; previous content should still be there
- root2 = tree.parse(filename)
- self.assertEquals('a', root.tag)
- self.assertEquals('a', root2.tag)
- # now remove all references to root2, and parse again
- del root2
- root3 = tree.parse(filename)
- self.assertEquals('a', root.tag)
- self.assertEquals('a', root3.tag)
- # root2's memory should've been freed here
- # XXX how to check?
+ try:
+ tree = self.etree.ElementTree()
+ root = tree.parse(filename)
+ # and now do it again; previous content should still be there
+ root2 = tree.parse(filename)
+ self.assertEquals('a', root.tag)
+ self.assertEquals('a', root2.tag)
+ # now remove all references to root2, and parse again
+ del root2
+ root3 = tree.parse(filename)
+ self.assertEquals('a', root.tag)
+ self.assertEquals('a', root3.tag)
+ # root2's memory should've been freed here
+ # XXX how to check?
+ finally:
+ os.remove(filename)
def test_class_parse_fileobject(self):
# (c)ElementTree class ElementTree has a 'parse' method that returns
@@ -74,10 +87,13 @@
filename = tempfile.mktemp(suffix=".xml")
open(filename, 'wb').write(self.root_str)
- f = open(filename, 'r')
- tree = self.etree.ElementTree()
- root = tree.parse(f)
- self.assertEqual(self.etree.tostring(root), self.root_str)
+ try:
+ f = open(filename, 'r')
+ tree = self.etree.ElementTree()
+ root = tree.parse(f)
+ self.assertEqual(self.etree.tostring(root), self.root_str)
+ finally:
+ os.remove(filename)
def test_class_parse_unamed_fileobject(self):
# (c)ElementTree class ElementTree has a 'parse' method that returns
From scoder at codespeak.net Mon May 22 08:54:34 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 08:54:34 +0200 (CEST)
Subject: [Lxml-checkins] r27579 - lxml/branch/xmlsave/src/lxml
Message-ID: <20060522065434.4F29C10071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 08:54:33 2006
New Revision: 27579
Modified:
lxml/branch/xmlsave/src/lxml/serializer.pxi
Log:
renamed internal method
Modified: lxml/branch/xmlsave/src/lxml/serializer.pxi
==============================================================================
--- lxml/branch/xmlsave/src/lxml/serializer.pxi (original)
+++ lxml/branch/xmlsave/src/lxml/serializer.pxi Mon May 22 08:54:33 2006
@@ -53,7 +53,7 @@
save_options = save_options | tree.XML_SAVE_NO_DECL
return save_options
- cdef int _saveNode(self, tree.xmlSaveCtxt* save_ctxt,
+ cdef int _saveNodeAndClose(self, tree.xmlSaveCtxt* save_ctxt,
xmlNode* c_node) except -1:
cdef long result
self._error_log.connect()
@@ -261,7 +261,7 @@
else:
raise TypeError, "File or filename expected, got '%s'" % type(f)
- formatter._saveNode(save_ctxt, element._c_node)
+ formatter._saveNodeAndClose(save_ctxt, element._c_node)
if writer is not None:
writer._exc_context._raise_if_stored()
From scoder at codespeak.net Mon May 22 08:56:09 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 08:56:09 +0200 (CEST)
Subject: [Lxml-checkins] r27580 - lxml/branch/xmlsave/src/lxml/tests
Message-ID: <20060522065609.0077510071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 08:56:09 2006
New Revision: 27580
Modified:
lxml/branch/xmlsave/src/lxml/tests/common_imports.py
Log:
utility function unhex_entities() in tests/common_imports.py to replace hex entities by their plain integer equivalent
Modified: lxml/branch/xmlsave/src/lxml/tests/common_imports.py
==============================================================================
--- lxml/branch/xmlsave/src/lxml/tests/common_imports.py (original)
+++ lxml/branch/xmlsave/src/lxml/tests/common_imports.py Mon May 22 08:56:09 2006
@@ -86,3 +86,8 @@
for entity_name, value in re.findall("(([0-9]+);)", xml):
xml = xml.replace(entity_name, unichr(int(value)))
return xml
+
+def unhex_entities(xml):
+ for entity_name, value in re.findall("((x[0-9a-fA-F]+);)", xml):
+ xml = xml.replace(entity_name, "%s;" % eval('0'+value))
+ return xml
From scoder at codespeak.net Mon May 22 08:56:53 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 08:56:53 +0200 (CEST)
Subject: [Lxml-checkins] r27581 - lxml/branch/xmlsave/src/lxml/tests
Message-ID: <20060522065653.B56B910071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 08:56:52 2006
New Revision: 27581
Modified:
lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py
Log:
use unhex_entities() to compare write() results (fixes one test case)
Modified: lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py (original)
+++ lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py Mon May 22 08:56:52 2006
@@ -11,7 +11,8 @@
import unittest, doctest
import os, shutil, tempfile, copy
-from common_imports import StringIO, etree, ElementTree, HelperTestCase, fileInTestDir, canonicalize
+from common_imports import StringIO, etree, ElementTree, HelperTestCase, fileInTestDir
+from common_imports import canonicalize, unhex_entities
class ETreeTestCaseBase(unittest.TestCase):
etree = None
@@ -1716,7 +1717,7 @@
data = f.getvalue()
self.assertEquals(
u'S?k p? nettet '.encode('ASCII', 'xmlcharrefreplace'),
- data)
+ unhex_entities(data))
def test_encoding_tostring(self):
Element = self.etree.Element
From scoder at codespeak.net Mon May 22 10:19:08 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 10:19:08 +0200 (CEST)
Subject: [Lxml-checkins] r27583 - lxml/trunk/src/lxml
Message-ID: <20060522081908.D790C10071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 10:19:07 2006
New Revision: 27583
Modified:
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tree.pxd
Log:
use correct API in comment text setting
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Mon May 22 10:19:07 2006
@@ -969,20 +969,11 @@
cdef tree.xmlDict* c_dict
cdef char* c_text
if value is None:
- value = ''
+ c_text = NULL
else:
value = _utf8(value)
- c_text = self._c_node.content
- if c_text is not NULL:
- if self._c_node.doc is not NULL:
- c_dict = self._c_node.doc.dict
- else:
- c_dict = NULL
- # this code is copied from libxml2's DICT_FREE
- if c_dict is NULL or \
- tree.xmlDictOwns(c_dict, c_text) == 0:
- tree.xmlFree(c_text)
- self._c_node.content = tree.xmlStrdup(_cstr(value))
+ c_text = _cstr(value)
+ tree.xmlNodeSetContent(self._c_node, c_text)
# ACCESSORS
def __repr__(self):
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Mon May 22 10:19:07 2006
@@ -198,6 +198,7 @@
xmlDoc* doc, xmlNode* cur, int level,
int format, char* encoding)
cdef void xmlNodeSetName(xmlNode* cur, char* name)
+ cdef void xmlNodeSetContent(xmlNode* cur, char* content)
cdef xmlDoc* xmlCopyDoc(xmlDoc* doc, int recursive)
cdef xmlNode* xmlCopyNode(xmlNode* node, int extended)
cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree)
From scoder at codespeak.net Mon May 22 10:44:17 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 10:44:17 +0200 (CEST)
Subject: [Lxml-checkins] r27584 - lxml/branch/xmlsave/src/lxml/tests
Message-ID: <20060522084417.EB53C10071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 10:44:16 2006
New Revision: 27584
Modified:
lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py
Log:
test XML escaping and latin1 encoding
Modified: lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py (original)
+++ lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py Mon May 22 10:44:16 2006
@@ -188,6 +188,32 @@
self.assertEquals(None, root.text)
self.assertEquals('One', root[0].text)
+ def test_text_escape_in(self):
+ ElementTree = self.etree.ElementTree
+
+ f = StringIO('This is > than a text ')
+ doc = ElementTree(file=f)
+ root = doc.getroot()
+ self.assertEquals('This is > than a text', root.text)
+
+ def test_text_escape_out(self):
+ ElementTree = self.etree.ElementTree
+ Element = self.etree.Element
+
+ a = Element("a")
+ a.text = "<>&"
+ self.assertXML('<>& ',
+ a)
+
+ def test_text_escape_tostring(self):
+ tostring = self.etree.tostring
+ Element = self.etree.Element
+
+ a = Element("a")
+ a.text = "<>&"
+ self.assertEqual('<>& ',
+ tostring(a))
+
def test_tail(self):
ElementTree = self.etree.ElementTree
@@ -1698,6 +1724,24 @@
self.assertEqual(u'S?k p? nettet '.encode('UTF-8'),
f.getvalue())
+ def test_encoding3(self):
+ ElementTree = self.etree.ElementTree
+ Element = self.etree.Element
+
+ a = Element('a')
+ a.text = u'S?k p? nettet'
+
+ f = StringIO()
+ tree = ElementTree(element=a)
+ tree.write(f, 'iso-8859-1')
+ result = f.getvalue()
+ declaration = ""
+ self.assertEqual(result[:len(declaration)],
+ declaration)
+ result = result[len(declaration):].strip()
+ self.assertEqual(u'S?k p? nettet '.encode('iso-8859-1'),
+ result)
+
# raise error on wrong (left-over?) encoding declaration in unicode strings
def _test_wrong_unicode_encoding(self):
XML = self.etree.XML
@@ -1781,7 +1825,6 @@
b = SubElement(a, 'b')
b.text = u'S?k p? nettet'
- # the same, just hex versus decimal
expected = 'Søk på nettet '
self.assertEquals(
expected,
From scoder at codespeak.net Mon May 22 11:41:01 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 11:41:01 +0200 (CEST)
Subject: [Lxml-checkins] r27585 - lxml/branch/xmlsave/src/lxml/tests
Message-ID: <20060522094101.6E5F610071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 11:41:00 2006
New Revision: 27585
Modified:
lxml/branch/xmlsave/src/lxml/tests/test_sax.py
Log:
compare c14n output in test_sax.py
Modified: lxml/branch/xmlsave/src/lxml/tests/test_sax.py
==============================================================================
--- lxml/branch/xmlsave/src/lxml/tests/test_sax.py (original)
+++ lxml/branch/xmlsave/src/lxml/tests/test_sax.py Mon May 22 11:41:00 2006
@@ -7,7 +7,7 @@
import unittest, doctest
from StringIO import StringIO
-from common_imports import HelperTestCase
+from common_imports import HelperTestCase, canonicalize
from lxml import sax
class ETreeSaxTestCase(HelperTestCase):
@@ -15,7 +15,7 @@
def test_etree_sax_simple(self):
tree = self.parse('ab ba ')
xml_out = self._saxify_serialize(tree)
- self.assertEquals('ab ba ',
+ self.assertEquals('ab ba ',
xml_out)
def test_etree_sax_double(self):
@@ -27,7 +27,7 @@
def test_etree_sax_attributes(self):
tree = self.parse('ab ba ')
xml_out = self._saxify_serialize(tree)
- self.assertEquals('ab ba ',
+ self.assertEquals('ab ba ',
xml_out)
def test_etree_sax_ns1(self):
@@ -54,11 +54,11 @@
b = a[0]
xml_out = self._saxify_serialize(a)
- self.assertEquals(' ',
+ self.assertEquals(' ',
xml_out)
xml_out = self._saxify_serialize(b)
- self.assertEquals(' ',
+ self.assertEquals(' ',
xml_out)
def test_element_sax_ns(self):
@@ -167,7 +167,7 @@
new_tree = self._saxify_unsaxify(tree)
f = StringIO()
new_tree.write(f)
- return f.getvalue()
+ return canonicalize(f.getvalue())
def test_suite():
From scoder at codespeak.net Mon May 22 11:48:03 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 11:48:03 +0200 (CEST)
Subject: [Lxml-checkins] r27586 - lxml/branch/xmlsave/src/lxml/tests
Message-ID: <20060522094803.C64D310071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 11:48:02 2006
New Revision: 27586
Modified:
lxml/branch/xmlsave/src/lxml/tests/test_sax.py
Log:
actually, use tostring() for comparison in test_sax.py
Modified: lxml/branch/xmlsave/src/lxml/tests/test_sax.py
==============================================================================
--- lxml/branch/xmlsave/src/lxml/tests/test_sax.py (original)
+++ lxml/branch/xmlsave/src/lxml/tests/test_sax.py Mon May 22 11:48:02 2006
@@ -7,15 +7,15 @@
import unittest, doctest
from StringIO import StringIO
-from common_imports import HelperTestCase, canonicalize
-from lxml import sax
+from common_imports import HelperTestCase
+from lxml import etree, sax
class ETreeSaxTestCase(HelperTestCase):
def test_etree_sax_simple(self):
tree = self.parse('ab ba ')
xml_out = self._saxify_serialize(tree)
- self.assertEquals('ab ba ',
+ self.assertEquals('ab ba ',
xml_out)
def test_etree_sax_double(self):
@@ -27,7 +27,7 @@
def test_etree_sax_attributes(self):
tree = self.parse('ab ba ')
xml_out = self._saxify_serialize(tree)
- self.assertEquals('ab ba ',
+ self.assertEquals('ab ba ',
xml_out)
def test_etree_sax_ns1(self):
@@ -54,11 +54,11 @@
b = a[0]
xml_out = self._saxify_serialize(a)
- self.assertEquals(' ',
+ self.assertEquals(' ',
xml_out)
xml_out = self._saxify_serialize(b)
- self.assertEquals(' ',
+ self.assertEquals(' ',
xml_out)
def test_element_sax_ns(self):
@@ -165,9 +165,7 @@
def _saxify_serialize(self, tree):
new_tree = self._saxify_unsaxify(tree)
- f = StringIO()
- new_tree.write(f)
- return canonicalize(f.getvalue())
+ return etree.tostring(new_tree.getroot())
def test_suite():
From scoder at codespeak.net Mon May 22 11:50:38 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 11:50:38 +0200 (CEST)
Subject: [Lxml-checkins] r27587 - lxml/branch/xmlsave/src/lxml
Message-ID: <20060522095038.768EC10071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 11:50:37 2006
New Revision: 27587
Modified:
lxml/branch/xmlsave/src/lxml/etree.pyx
Log:
check for NULL results
Modified: lxml/branch/xmlsave/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/xmlsave/src/lxml/etree.pyx (original)
+++ lxml/branch/xmlsave/src/lxml/etree.pyx Mon May 22 11:50:37 2006
@@ -702,6 +702,8 @@
text = _utf8(value)
c_text_node = tree.xmlNewDocText(self._doc._c_doc,
_cstr(text))
+ if c_text_node is NULL:
+ raise LxmlError, "Error creating text node"
if self._c_node.children is NULL:
tree.xmlAddChild(self._c_node, c_text_node)
else:
@@ -720,6 +722,8 @@
return
text = _utf8(value)
c_text_node = tree.xmlNewDocText(self._doc._c_doc, _cstr(text))
+ if c_text_node is NULL:
+ raise LxmlError, "Error creating text node"
# XXX what if we're the top element?
tree.xmlAddNextSibling(self._c_node, c_text_node)
From scoder at codespeak.net Mon May 22 11:54:13 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 11:54:13 +0200 (CEST)
Subject: [Lxml-checkins] r27588 - lxml/branch/xmlsave/src/lxml
Message-ID: <20060522095413.8B2DA10071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 11:54:12 2006
New Revision: 27588
Modified:
lxml/branch/xmlsave/src/lxml/serializer.pxi
lxml/branch/xmlsave/src/lxml/tree.pxd
Log:
use xmlDocSave in XMLFormatter._saveNodeAndClose() to make it write the XML declaration, prevent escaping characters (libxml2 bug) except on escape_characters keyword
Modified: lxml/branch/xmlsave/src/lxml/serializer.pxi
==============================================================================
--- lxml/branch/xmlsave/src/lxml/serializer.pxi (original)
+++ lxml/branch/xmlsave/src/lxml/serializer.pxi Mon May 22 11:54:12 2006
@@ -8,11 +8,12 @@
cdef class XMLFormatter:
cdef int _pretty_print
cdef int _write_declaration
+ cdef int _escape_characters
cdef int _save_options
cdef _ErrorLog _error_log
def __init__(self, pretty_print=False, write_declaration=None,
- split_empty_tags=False):
+ split_empty_tags=False, escape_characters=None):
cdef int save_options
save_options = tree.XML_SAVE_NO_XHTML
@@ -33,6 +34,11 @@
self._write_declaration = False
save_options = save_options | tree.XML_SAVE_NO_DECL
+ if escape_characters is None:
+ self._escape_characters = -1
+ else:
+ self._escape_characters = bool(escape_characters)
+
self._save_options = save_options
self._error_log = _ErrorLog()
@@ -40,6 +46,16 @@
def __get__(self):
return self._error_log.copy()
+ cdef _raiseError(self):
+ error = self._error_log.last_error
+ if error is not None:
+ if error.domain == xmlerror.XML_FROM_IO:
+ raise IOError, error.message
+ else:
+ raise XMLOutputError, error.message
+ else:
+ raise XMLOutputError, "Error serializing the tree"
+
cdef int _optionsForEncoding(self, encoding):
"""Purely for ElementTree compatibility: suppress XML declaration in
default cases."""
@@ -49,31 +65,41 @@
save_options = self._save_options
if encoding is None:
save_options = save_options | tree.XML_SAVE_NO_DECL
- elif encoding.upper() in ('US-ASCII', 'ASCII', 'UTF8', 'UTF-8'):
+ elif encoding in ('US-ASCII', 'ASCII', 'UTF8', 'UTF-8'):
save_options = save_options | tree.XML_SAVE_NO_DECL
return save_options
- cdef int _saveNodeAndClose(self, tree.xmlSaveCtxt* save_ctxt,
- xmlNode* c_node) except -1:
+ cdef void _setupCharacterEscaping(self, tree.xmlSaveCtxt* save_ctxt,
+ encoding):
+ """libxml2 defaults to escaping every non-ascii character whatever the
+ encoding, but we only want that for ASCII encoding."""
+ if self._escape_characters == -1:
+ if encoding is not None and encoding not in ('US-ASCII', 'ASCII'):
+ tree.xmlSaveSetEscape(save_ctxt, NULL)
+
+ cdef _saveNodeAndClose(self, tree.xmlSaveCtxt* save_ctxt,
+ xmlNode* c_node):
cdef long result
+ cdef xmlDoc* c_doc
+ cdef xmlDoc* c_root_doc
self._error_log.connect()
- result = tree.xmlSaveTree(save_ctxt, c_node)
+ if self._escape_characters == 0:
+ tree.xmlSaveSetEscape(save_ctxt, NULL)
+
+ c_doc = c_node.doc
+ c_root_doc = _fakeRootDoc(c_doc, c_node)
+ result = tree.xmlSaveDoc(save_ctxt, c_root_doc)
tree.xmlSaveClose(save_ctxt)
+ _destroyFakeDoc(c_doc, c_root_doc)
+
self._error_log.disconnect()
if result < 0:
- error = self._error_log.last_error
- if error is not None:
- if error.domain == xmlerror.XML_FROM_IO:
- raise IOError, error.message
- else:
- raise XMLOutputError, error.message
- else:
- raise XMLOutputError, "Error serializing the tree"
- else:
- return 0
+ self._raiseError()
cdef class XHTMLFormatter(XMLFormatter):
def __init__(self, **kwargs):
+ if 'escape_entities' not in kwargs:
+ kwargs['escape_entities'] = True
XMLFormatter.__init__(self, **kwargs)
self._save_options = self._save_options & (~tree.XML_SAVE_NO_XHTML)
@@ -261,6 +287,7 @@
else:
raise TypeError, "File or filename expected, got '%s'" % type(f)
+ formatter._setupCharacterEscaping(save_ctxt, encoding)
formatter._saveNodeAndClose(save_ctxt, element._c_node)
if writer is not None:
writer._exc_context._raise_if_stored()
Modified: lxml/branch/xmlsave/src/lxml/tree.pxd
==============================================================================
--- lxml/branch/xmlsave/src/lxml/tree.pxd (original)
+++ lxml/branch/xmlsave/src/lxml/tree.pxd Mon May 22 11:54:12 2006
@@ -248,6 +248,7 @@
cdef long xmlSaveDoc(xmlSaveCtxt* ctxt, xmlDoc* doc)
cdef long xmlSaveTree(xmlSaveCtxt* ctxt, xmlNode* node)
cdef int xmlSaveClose(xmlSaveCtxt* ctxt)
+ cdef int xmlSaveSetEscape(xmlSaveCtxt* ctxt, void* escape_function)
cdef extern from "libxml/xmlstring.h":
cdef char* xmlStrdup(char* cur)
From scoder at codespeak.net Mon May 22 11:55:36 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 11:55:36 +0200 (CEST)
Subject: [Lxml-checkins] r27589 - lxml/branch/xmlsave/src/lxml/tests
Message-ID: <20060522095536.A2A6110071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 11:55:35 2006
New Revision: 27589
Modified:
lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py
lxml/branch/xmlsave/src/lxml/tests/test_io.py
Log:
cleanup in test cases, ignore acceptable incompatibilities between ET and etree in terms of whitespace
Modified: lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py (original)
+++ lxml/branch/xmlsave/src/lxml/tests/test_elementtree.py Mon May 22 11:55:35 2006
@@ -9,7 +9,7 @@
"""
import unittest, doctest
-import os, shutil, tempfile, copy
+import os, re, shutil, tempfile, copy
from common_imports import StringIO, etree, ElementTree, HelperTestCase, fileInTestDir
from common_imports import canonicalize, unhex_entities
@@ -211,7 +211,7 @@
a = Element("a")
a.text = "<>&"
- self.assertEqual('<>& ',
+ self.assertEquals('<>& ',
tostring(a))
def test_tail(self):
@@ -733,7 +733,7 @@
a = Element('a')
a.append(Comment('foo'))
- self.assertEqual(a[0].text, 'foo')
+ self.assertEquals(a[0].text, 'foo')
def test_comment_text(self):
Element = self.etree.Element
@@ -742,10 +742,10 @@
a = Element('a')
a.append(Comment('foo'))
- self.assertEqual(a[0].text, 'foo')
+ self.assertEquals(a[0].text, 'foo')
a[0].text = "TEST"
- self.assertEqual(a[0].text, 'TEST')
+ self.assertEquals(a[0].text, 'TEST')
def test_comment_whitespace(self):
Element = self.etree.Element
@@ -754,7 +754,7 @@
a = Element('a')
a.append(Comment(' foo '))
- self.assertEqual(a[0].text, ' foo ')
+ self.assertEquals(a[0].text, ' foo ')
def test_comment_nonsense(self):
Comment = self.etree.Comment
@@ -1711,7 +1711,7 @@
u'S?k p? nettet '.encode('UTF-8'),
a, 'utf-8')
- def test_encoding2(self):
+ def test_encoding_exact(self):
ElementTree = self.etree.ElementTree
Element = self.etree.Element
@@ -1721,26 +1721,24 @@
f = StringIO()
tree = ElementTree(element=a)
tree.write(f, 'utf-8')
- self.assertEqual(u'S?k p? nettet '.encode('UTF-8'),
- f.getvalue())
+ self.assertEquals(u'S?k p? nettet '.encode('UTF-8'),
+ f.getvalue().strip())
- def test_encoding3(self):
+ def test_encoding_latin1(self):
ElementTree = self.etree.ElementTree
Element = self.etree.Element
a = Element('a')
a.text = u'S?k p? nettet'
-
+
f = StringIO()
tree = ElementTree(element=a)
tree.write(f, 'iso-8859-1')
result = f.getvalue()
declaration = ""
- self.assertEqual(result[:len(declaration)],
- declaration)
- result = result[len(declaration):].strip()
- self.assertEqual(u'S?k p? nettet '.encode('iso-8859-1'),
- result)
+ self.assertEncodingDeclaration(result,'iso-8859-1')
+ self.assertEquals(u'S?k p? nettet '.encode('iso-8859-1'),
+ result.split('?>', 1)[-1].strip())
# raise error on wrong (left-over?) encoding declaration in unicode strings
def _test_wrong_unicode_encoding(self):
@@ -1761,7 +1759,7 @@
data = f.getvalue()
self.assertEquals(
u'S?k p? nettet '.encode('ASCII', 'xmlcharrefreplace'),
- unhex_entities(data))
+ unhex_entities(data.strip()))
def test_encoding_tostring(self):
Element = self.etree.Element
@@ -1769,8 +1767,8 @@
a = Element('a')
a.text = u'S?k p? nettet'
- self.assertEqual(u'S?k p? nettet '.encode('UTF-8'),
- tostring(a, 'utf-8'))
+ self.assertEquals(u'S?k p? nettet '.encode('UTF-8'),
+ tostring(a, 'utf-8'))
def test_encoding_tostring_unknown(self):
Element = self.etree.Element
@@ -1788,8 +1786,8 @@
a = Element('a')
b = SubElement(a, 'b')
b.text = u'S?k p? nettet'
- self.assertEqual(u'S?k p? nettet '.encode('UTF-8'),
- tostring(b, 'utf-8'))
+ self.assertEquals(u'S?k p? nettet '.encode('UTF-8'),
+ tostring(b, 'utf-8'))
def test_encoding_tostring_sub_tail(self):
Element = self.etree.Element
@@ -1800,8 +1798,8 @@
b = SubElement(a, 'b')
b.text = u'S?k p? nettet'
b.tail = u'S?k'
- self.assertEqual(u'S?k p? nettet S?k'.encode('UTF-8'),
- tostring(b, 'utf-8'))
+ self.assertEquals(u'S?k p? nettet S?k'.encode('UTF-8'),
+ tostring(b, 'utf-8'))
def test_encoding_tostring_default_encoding(self):
Element = self.etree.Element
@@ -1949,9 +1947,17 @@
"""
self.assertEquals(expected, self._writeElement(element, encoding))
self.assertEquals(expected, self._writeElementFile(element, encoding))
-
+
+ def assertEncodingDeclaration(self, result, encoding):
+ "Checks if the result XML byte string specifies the encoding."
+ has_encoding = re.compile(r"<\?xml[^>]+ encoding=[\"']([^\"']+)[\"']").match
+ self.assert_(has_encoding(result))
+ result_encoding = has_encoding(result).group(1)
+ self.assertEqual(result_encoding.upper(), encoding.upper())
+
def _rootstring(self, tree):
- return self.etree.tostring(tree.getroot()).replace(' ', '').replace('\n', '')
+ return self.etree.tostring(
+ tree.getroot()).replace(' ', '').replace('\n', '')
def _check_element_tree(self, tree):
self._check_element(tree.getroot())
Modified: lxml/branch/xmlsave/src/lxml/tests/test_io.py
==============================================================================
--- lxml/branch/xmlsave/src/lxml/tests/test_io.py (original)
+++ lxml/branch/xmlsave/src/lxml/tests/test_io.py Mon May 22 11:55:35 2006
@@ -28,7 +28,7 @@
filename = tempfile.mktemp(suffix=".xml")
self.tree.write(filename)
try:
- self.assertEqual(open(filename).read(), self.root_str)
+ self.assertEqual(open(filename).read().strip(), self.root_str)
finally:
os.remove(filename)
From scoder at codespeak.net Mon May 22 12:06:43 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 12:06:43 +0200 (CEST)
Subject: [Lxml-checkins] r27590 - lxml/branch/xmlsave/src/lxml
Message-ID: <20060522100643.8C8C510071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 12:06:42 2006
New Revision: 27590
Modified:
lxml/branch/xmlsave/src/lxml/serializer.pxi
Log:
cleanup
Modified: lxml/branch/xmlsave/src/lxml/serializer.pxi
==============================================================================
--- lxml/branch/xmlsave/src/lxml/serializer.pxi (original)
+++ lxml/branch/xmlsave/src/lxml/serializer.pxi Mon May 22 12:06:42 2006
@@ -71,29 +71,42 @@
cdef void _setupCharacterEscaping(self, tree.xmlSaveCtxt* save_ctxt,
encoding):
- """libxml2 defaults to escaping every non-ascii character whatever the
- encoding, but we only want that for ASCII encoding."""
+ """Work-around for libxml2 bug: it defaults to escaping every
+ non-ascii character whatever the encoding, but we only want that for
+ ASCII encoding."""
if self._escape_characters == -1:
if encoding is not None and encoding not in ('US-ASCII', 'ASCII'):
tree.xmlSaveSetEscape(save_ctxt, NULL)
- cdef _saveNodeAndClose(self, tree.xmlSaveCtxt* save_ctxt,
- xmlNode* c_node):
- cdef long result
+ cdef _saveDocAndClose(self, tree.xmlSaveCtxt* save_ctxt,
+ xmlNode* c_root_node):
+ cdef long bytes_written
cdef xmlDoc* c_doc
cdef xmlDoc* c_root_doc
self._error_log.connect()
if self._escape_characters == 0:
tree.xmlSaveSetEscape(save_ctxt, NULL)
- c_doc = c_node.doc
- c_root_doc = _fakeRootDoc(c_doc, c_node)
- result = tree.xmlSaveDoc(save_ctxt, c_root_doc)
+ c_doc = c_root_node.doc
+ c_root_doc = _fakeRootDoc(c_doc, c_root_node)
+ bytes_written = tree.xmlSaveDoc(save_ctxt, c_root_doc)
tree.xmlSaveClose(save_ctxt)
_destroyFakeDoc(c_doc, c_root_doc)
self._error_log.disconnect()
- if result < 0:
+ if bytes_written < 0:
+ self._raiseError()
+
+ cdef _saveNodeAndClose(self, tree.xmlSaveCtxt* save_ctxt,
+ xmlNode* c_node):
+ cdef long bytes_written
+ self._error_log.connect()
+ if self._escape_characters == 0:
+ tree.xmlSaveSetEscape(save_ctxt, NULL)
+ bytes_written = tree.xmlSaveTree(save_ctxt, c_node)
+ tree.xmlSaveClose(save_ctxt)
+ self._error_log.disconnect()
+ if bytes_written < 0:
self._raiseError()
cdef class XHTMLFormatter(XMLFormatter):
@@ -129,7 +142,7 @@
c_buffer = tree.xmlAllocOutputBuffer(enchandler)
if c_buffer is NULL:
tree.xmlCharEncCloseFunc(enchandler)
- raise LxmlError, "Failed to create output buffer"
+ raise LxmlOutputError, "Failed to create output buffer"
try:
_writeNodeToBuffer(c_buffer, element._c_node, c_enc,
@@ -155,7 +168,7 @@
return None
c_buffer = tree.xmlAllocOutputBuffer(NULL)
if c_buffer is NULL:
- raise LxmlError, "Failed to create output buffer"
+ raise LxmlOutputError, "Failed to create output buffer"
try:
_writeNodeToBuffer(c_buffer, element._c_node, NULL, 0, pretty_print)
tree.xmlOutputBufferFlush(c_buffer)
@@ -288,7 +301,7 @@
raise TypeError, "File or filename expected, got '%s'" % type(f)
formatter._setupCharacterEscaping(save_ctxt, encoding)
- formatter._saveNodeAndClose(save_ctxt, element._c_node)
+ formatter._saveDocAndClose(save_ctxt, element._c_node)
if writer is not None:
writer._exc_context._raise_if_stored()
From scoder at codespeak.net Mon May 22 12:31:39 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 12:31:39 +0200 (CEST)
Subject: [Lxml-checkins] r27592 - in lxml/trunk: . doc src/lxml
Message-ID: <20060522103139.D512210071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 12:31:38 2006
New Revision: 27592
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/api.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tree.pxd
Log:
getpath() method on Element to return a structural XPath expression for the element
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Mon May 22 12:31:38 2006
@@ -7,6 +7,9 @@
Features added
--------------
+* Element.getpath() returns an XPath expression to find the node in the tree
+ structure
+
* Error logs now have a ``last_error`` attribute for convenience
* Comment texts can now be changed through the API
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Mon May 22 12:31:38 2006
@@ -244,6 +244,19 @@
>>> r[0].text
'Text'
+A related convenience method of Elements is ``getpath()``, which returns a
+structural XPath expression for the respective element::
+
+ >>> a = etree.Element("a")
+ >>> b = etree.SubElement(a, "b")
+ >>> c = etree.SubElement(a, "c")
+ >>> d1 = etree.SubElement(c, "d")
+ >>> d2 = etree.SubElement(c, "d")
+ >>> print d2.getpath()
+ /a/c/d[2]
+ >>> a.xpath(d2.getpath()) == [d2]
+ True
+
XSLT
----
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Mon May 22 12:31:38 2006
@@ -787,6 +787,10 @@
return ElementChildIterator(self, reversed=True)
def index(self, _Element x not None, start=None, stop=None):
+ """Find the position of the child within the parent.
+
+ This method is not part of the original ElementTree API.
+ """
cdef Py_ssize_t k, l
cdef Py_ssize_t c_start, c_stop
cdef xmlNode* c_child
@@ -885,6 +889,15 @@
return _elementFactory(self._doc, c_node)
return None
+ def getpath(self):
+ cdef char* c_path
+ c_path = tree.xmlGetNodePath(self._c_node)
+ if c_path is NULL:
+ raise LxmlError, "Cannot create node path."
+ path = c_path
+ tree.xmlFree(c_path)
+ return path
+
def getiterator(self, tag=None):
return ElementDepthFirstIterator(self, tag)
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Mon May 22 12:31:38 2006
@@ -177,6 +177,7 @@
cdef void xmlSetProp(xmlNode* node, char* name, char* value)
cdef void xmlSetNsProp(xmlNode* node, xmlNs* ns, char* name, char* value)
cdef void xmlRemoveProp(xmlAttr* cur)
+ cdef char* xmlGetNodePath(xmlNode* node)
cdef void xmlDocDumpMemory(xmlDoc* cur, char** mem, int* size)
cdef void xmlDocDumpMemoryEnc(xmlDoc* cur, char** mem, int* size,
char* encoding)
From scoder at codespeak.net Mon May 22 12:33:00 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 12:33:00 +0200 (CEST)
Subject: [Lxml-checkins] r27593 - lxml/trunk/src/lxml
Message-ID: <20060522103300.165F210071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 12:32:59 2006
New Revision: 27593
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
cleanup
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Mon May 22 12:32:59 2006
@@ -893,7 +893,7 @@
cdef char* c_path
c_path = tree.xmlGetNodePath(self._c_node)
if c_path is NULL:
- raise LxmlError, "Cannot create node path."
+ raise LxmlError, "Error creating node path."
path = c_path
tree.xmlFree(c_path)
return path
From scoder at codespeak.net Mon May 22 14:24:57 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 14:24:57 +0200 (CEST)
Subject: [Lxml-checkins] r27595 - lxml/branch/xmlsave/src/lxml
Message-ID: <20060522122457.7572A10071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 14:24:56 2006
New Revision: 27595
Modified:
lxml/branch/xmlsave/src/lxml/etree.pyx
lxml/branch/xmlsave/src/lxml/serializer.pxi
lxml/branch/xmlsave/src/lxml/tree.pxd
Log:
support writing .tail to files, cleanup
Modified: lxml/branch/xmlsave/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/xmlsave/src/lxml/etree.pyx (original)
+++ lxml/branch/xmlsave/src/lxml/etree.pyx Mon May 22 14:24:56 2006
@@ -367,15 +367,7 @@
Defaults to ASCII encoding.
"""
self._assertHasRoot()
- # suppress decl. in default case (purely for ElementTree compatibility)
- if encoding is None:
- encoding = 'ASCII'
- write_declaration = 0
- else:
- encoding = encoding.upper()
- write_declaration = encoding not in \
- ('US-ASCII', 'ASCII', 'UTF8', 'UTF-8')
- _tofilelike(file, self._context_node, encoding, formatter)
+ _tofilelike(file, self._context_node, 0, encoding, formatter)
def getiterator(self, tag=None):
root = self.getroot()
Modified: lxml/branch/xmlsave/src/lxml/serializer.pxi
==============================================================================
--- lxml/branch/xmlsave/src/lxml/serializer.pxi (original)
+++ lxml/branch/xmlsave/src/lxml/serializer.pxi Mon May 22 14:24:56 2006
@@ -78,9 +78,10 @@
if encoding is not None and encoding not in ('US-ASCII', 'ASCII'):
tree.xmlSaveSetEscape(save_ctxt, NULL)
- cdef _saveDocAndClose(self, tree.xmlSaveCtxt* save_ctxt,
- xmlNode* c_root_node):
+ cdef _saveDocNodeAndClose(self, tree.xmlSaveCtxt* save_ctxt,
+ xmlNode* c_root_node, int add_tail):
cdef long bytes_written
+ cdef xmlNode* c_node
cdef xmlDoc* c_doc
cdef xmlDoc* c_root_doc
self._error_log.connect()
@@ -90,21 +91,35 @@
c_doc = c_root_node.doc
c_root_doc = _fakeRootDoc(c_doc, c_root_node)
bytes_written = tree.xmlSaveDoc(save_ctxt, c_root_doc)
- tree.xmlSaveClose(save_ctxt)
_destroyFakeDoc(c_doc, c_root_doc)
+ if add_tail:
+ c_node = c_root_node.next
+ while bytes_written >= 0 and c_node is not NULL and \
+ c_node.type == tree.XML_TEXT_NODE:
+ bytes_written = tree.xmlSaveTree(save_ctxt, c_node)
+ c_node = c_node.next
+ tree.xmlSaveClose(save_ctxt)
self._error_log.disconnect()
if bytes_written < 0:
self._raiseError()
cdef _saveNodeAndClose(self, tree.xmlSaveCtxt* save_ctxt,
- xmlNode* c_node):
+ xmlNode* c_node, int add_tail):
cdef long bytes_written
self._error_log.connect()
if self._escape_characters == 0:
tree.xmlSaveSetEscape(save_ctxt, NULL)
+
bytes_written = tree.xmlSaveTree(save_ctxt, c_node)
+ if add_tail:
+ c_node = c_node.next
+ while bytes_written >= 0 and c_node is not NULL and \
+ c_node.type == tree.XML_TEXT_NODE:
+ bytes_written = tree.xmlSaveTree(save_ctxt, c_node)
+ c_node = c_node.next
tree.xmlSaveClose(save_ctxt)
+
self._error_log.disconnect()
if bytes_written < 0:
self._raiseError()
@@ -126,7 +141,6 @@
cdef tree.xmlBuffer* c_result_buffer
cdef tree.xmlCharEncodingHandler* enchandler
cdef char* c_enc
- cdef char* c_version
if element is None:
return None
if encoding is None:
@@ -276,7 +290,8 @@
cdef int _closeFilelikeWriter(void* ctxt):
return (<_FilelikeWriter>ctxt).close()
-cdef _tofilelike(f, _NodeBase element, encoding, XMLFormatter formatter):
+cdef _tofilelike(f, _NodeBase element, int add_tail,
+ encoding, XMLFormatter formatter):
cdef _FilelikeWriter writer
cdef tree.xmlSaveCtxt* save_ctxt
cdef char* c_enc
@@ -294,6 +309,8 @@
filename = _utf8(f)
save_ctxt = tree.xmlSaveToFilename(
_cstr(filename), c_enc, save_options)
+ if save_ctxt is NULL:
+ raise IOError, "Failed to create I/O writer context"
elif hasattr(f, 'write'):
writer = _FilelikeWriter(f)
save_ctxt = writer._createSaveContext(c_enc, save_options)
@@ -301,7 +318,7 @@
raise TypeError, "File or filename expected, got '%s'" % type(f)
formatter._setupCharacterEscaping(save_ctxt, encoding)
- formatter._saveDocAndClose(save_ctxt, element._c_node)
+ formatter._saveDocNodeAndClose(save_ctxt, element._c_node, add_tail)
if writer is not None:
writer._exc_context._raise_if_stored()
Modified: lxml/branch/xmlsave/src/lxml/tree.pxd
==============================================================================
--- lxml/branch/xmlsave/src/lxml/tree.pxd (original)
+++ lxml/branch/xmlsave/src/lxml/tree.pxd Mon May 22 14:24:56 2006
@@ -202,6 +202,7 @@
cdef xmlNode* xmlCopyNode(xmlNode* node, int extended)
cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree)
cdef xmlBuffer* xmlBufferCreate()
+ cdef void xmlBufferFree(xmlBuffer* buf)
cdef char* xmlBufferContent(xmlBuffer* buf)
cdef int xmlBufferLength(xmlBuffer* buf)
cdef int xmlKeepBlanksDefault(int val)
From scoder at codespeak.net Mon May 22 15:12:15 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 15:12:15 +0200 (CEST)
Subject: [Lxml-checkins] r27596 - lxml/branch/xmlsave/src/lxml
Message-ID: <20060522131215.8833610071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 15:12:14 2006
New Revision: 27596
Modified:
lxml/branch/xmlsave/src/lxml/etree.pyx
lxml/branch/xmlsave/src/lxml/serializer.pxi
lxml/branch/xmlsave/src/lxml/xmlerror.pxi
Log:
some cleanup, fixed threading issues with global libxml2 settings, renamed XMLFormatter to XMLSerializer and reverted write() back to 'xml_declaration' keyword instead of 'formatter' to make XMLSerializer a separate API
Modified: lxml/branch/xmlsave/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/xmlsave/src/lxml/etree.pyx (original)
+++ lxml/branch/xmlsave/src/lxml/etree.pyx Mon May 22 15:12:14 2006
@@ -33,6 +33,11 @@
# make the compiled-in debug state publicly available
DEBUG = __DEBUG
+def initThread():
+ "Call this method to set up the library from within a new thread."
+ _initThreadLogging()
+ tree.xmlKeepBlanksDefault(0)
+
# Error superclass for ElementTree compatibility
class Error(Exception):
pass
@@ -361,13 +366,17 @@
def __get__(self):
return DocInfo(self._doc)
- def write(self, file, encoding=None, formatter=None):
+ def write(self, file, encoding=None, xml_declaration=None):
"""Write the tree to a file or file-like object.
Defaults to ASCII encoding.
"""
self._assertHasRoot()
- _tofilelike(file, self._context_node, 0, encoding, formatter)
+ if xml_declaration is None:
+ serializer = None
+ else:
+ serializer = XMLSerializer(write_declaration=xml_declaration)
+ _tofilelike(file, self._context_node, 0, encoding, serializer)
def getiterator(self, tag=None):
root = self.getroot()
@@ -1400,7 +1409,7 @@
Defaults to ASCII encoding without XML declaration.
"""
- cdef int write_declaration
+ cdef int c_write_declaration
cdef int c_pretty_print
if encoding is None:
encoding = 'ASCII'
@@ -1409,17 +1418,17 @@
c_pretty_print = bool(pretty_print)
if xml_declaration is None:
# by default, write an XML declaration only for non-standard encodings
- write_declaration = encoding not in \
+ c_write_declaration = encoding not in \
('ASCII', 'UTF-8', 'UTF8', 'US-ASCII')
else:
- write_declaration = bool(xml_declaration)
+ c_write_declaration = bool(xml_declaration)
if isinstance(element_or_tree, _NodeBase):
return _tostring(<_NodeBase>element_or_tree,
- encoding, write_declaration, c_pretty_print)
+ encoding, c_write_declaration, c_pretty_print)
elif isinstance(element_or_tree, _ElementTree):
return _tostring((<_ElementTree>element_or_tree)._context_node,
- encoding, write_declaration, c_pretty_print)
+ encoding, c_write_declaration, c_pretty_print)
else:
raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree)
@@ -1498,3 +1507,6 @@
include "relaxng.pxi" # RelaxNG
include "xmlschema.pxi" # XMLSchema
+
+# configure main thread
+initThread()
Modified: lxml/branch/xmlsave/src/lxml/serializer.pxi
==============================================================================
--- lxml/branch/xmlsave/src/lxml/serializer.pxi (original)
+++ lxml/branch/xmlsave/src/lxml/serializer.pxi Mon May 22 15:12:14 2006
@@ -1,11 +1,9 @@
# XML serialization and output functions
-tree.xmlKeepBlanksDefault(0)
-
class XMLOutputError(LxmlError):
pass
-cdef class XMLFormatter:
+cdef class XMLSerializer:
cdef int _pretty_print
cdef int _write_declaration
cdef int _escape_characters
@@ -124,15 +122,15 @@
if bytes_written < 0:
self._raiseError()
-cdef class XHTMLFormatter(XMLFormatter):
+cdef class XHTMLSerializer(XMLSerializer):
def __init__(self, **kwargs):
if 'escape_entities' not in kwargs:
kwargs['escape_entities'] = True
- XMLFormatter.__init__(self, **kwargs)
+ XMLSerializer.__init__(self, **kwargs)
self._save_options = self._save_options & (~tree.XML_SAVE_NO_XHTML)
-cdef XMLFormatter __DEFAULT_XML_FORMATTER
-__DEFAULT_XML_FORMATTER = XMLFormatter()
+cdef XMLSerializer __DEFAULT_XML_SERIALIZER
+__DEFAULT_XML_SERIALIZER = XMLSerializer()
cdef _tostring(_NodeBase element, encoding,
int write_xml_declaration, int pretty_print):
@@ -290,8 +288,8 @@
cdef int _closeFilelikeWriter(void* ctxt):
return (<_FilelikeWriter>ctxt).close()
-cdef _tofilelike(f, _NodeBase element, int add_tail,
- encoding, XMLFormatter formatter):
+cdef _tofilelike(f, _NodeBase element, int add_tail, encoding,
+ XMLSerializer serializer):
cdef _FilelikeWriter writer
cdef tree.xmlSaveCtxt* save_ctxt
cdef char* c_enc
@@ -301,9 +299,9 @@
else:
encoding = encoding.upper()
c_enc = encoding
- if formatter is None:
- formatter = __DEFAULT_XML_FORMATTER
- save_options = formatter._optionsForEncoding(encoding)
+ if serializer is None:
+ serializer = __DEFAULT_XML_SERIALIZER
+ save_options = serializer._optionsForEncoding(encoding)
if python.PyString_Check(f) or python.PyUnicode_Check(f):
filename = _utf8(f)
@@ -317,8 +315,8 @@
else:
raise TypeError, "File or filename expected, got '%s'" % type(f)
- formatter._setupCharacterEscaping(save_ctxt, encoding)
- formatter._saveDocNodeAndClose(save_ctxt, element._c_node, add_tail)
+ serializer._setupCharacterEscaping(save_ctxt, encoding)
+ serializer._saveDocNodeAndClose(save_ctxt, element._c_node, add_tail)
if writer is not None:
writer._exc_context._raise_if_stored()
Modified: lxml/branch/xmlsave/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/branch/xmlsave/src/lxml/xmlerror.pxi (original)
+++ lxml/branch/xmlsave/src/lxml/xmlerror.pxi Mon May 22 15:12:14 2006
@@ -9,8 +9,10 @@
Note that this log is already bounded to a fixed size."""
__GLOBAL_ERROR_LOG.clear()
-def initThreadLogging():
- "Setup logging for the current thread."
+cdef void _initThreadLogging():
+ "Setup logging for the current thread. Called from etree.initThread()."
+ # switch on line number reporting
+ xmlparser.xmlLineNumbersDefault(1)
_logLibxmlErrors()
try:
_logLibxsltErrors()
@@ -18,7 +20,6 @@
# compiled without libxslt
pass
-
# Logging classes
cdef class _LogEntry:
@@ -339,12 +340,6 @@
xmlerror.xmlSetGenericErrorFunc(NULL, _nullGenericErrorFunc)
xmlerror.xmlSetStructuredErrorFunc(NULL, _receiveError)
-# init global logging
-initThreadLogging()
-
-# switch on line number reporting
-xmlparser.xmlLineNumbersDefault(1)
-
################################################################################
## CONSTANTS FROM "xmlerror.pxd"
################################################################################
From scoder at codespeak.net Mon May 22 16:29:41 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 16:29:41 +0200 (CEST)
Subject: [Lxml-checkins] r27599 - lxml/trunk/src/lxml/tests
Message-ID: <20060522142941.BA2C510071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 16:29:40 2006
New Revision: 27599
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
lxml/trunk/src/lxml/tests/test_etree.py
Log:
some cleanup in test cases, new test cases merged in from xmlsave branch
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Mon May 22 16:29:40 2006
@@ -9,7 +9,7 @@
"""
import unittest, doctest
-import os, shutil, tempfile, copy
+import os, re, shutil, tempfile, copy
from common_imports import StringIO, etree, ElementTree, HelperTestCase, fileInTestDir, canonicalize
@@ -187,6 +187,31 @@
self.assertEquals(None, root.text)
self.assertEquals('One', root[0].text)
+ def test_text_escape_in(self):
+ ElementTree = self.etree.ElementTree
+
+ f = StringIO('This is > than a text ')
+ doc = ElementTree(file=f)
+ root = doc.getroot()
+ self.assertEquals('This is > than a text', root.text)
+
+ def test_text_escape_out(self):
+ Element = self.etree.Element
+
+ a = Element("a")
+ a.text = "<>&"
+ self.assertXML('<>& ',
+ a)
+
+ def test_text_escape_tostring(self):
+ tostring = self.etree.tostring
+ Element = self.etree.Element
+
+ a = Element("a")
+ a.text = "<>&"
+ self.assertEquals('<>& ',
+ tostring(a))
+
def test_tail(self):
ElementTree = self.etree.ElementTree
@@ -706,7 +731,7 @@
a = Element('a')
a.append(Comment('foo'))
- self.assertEqual(a[0].text, 'foo')
+ self.assertEquals(a[0].text, 'foo')
def test_comment_text(self):
Element = self.etree.Element
@@ -715,10 +740,10 @@
a = Element('a')
a.append(Comment('foo'))
- self.assertEqual(a[0].text, 'foo')
+ self.assertEquals(a[0].text, 'foo')
a[0].text = "TEST"
- self.assertEqual(a[0].text, 'TEST')
+ self.assertEquals(a[0].text, 'TEST')
def test_comment_whitespace(self):
Element = self.etree.Element
@@ -727,7 +752,7 @@
a = Element('a')
a.append(Comment(' foo '))
- self.assertEqual(a[0].text, ' foo ')
+ self.assertEquals(a[0].text, ' foo ')
def test_comment_nonsense(self):
Comment = self.etree.Comment
@@ -1684,7 +1709,7 @@
u'S?k p? nettet '.encode('UTF-8'),
a, 'utf-8')
- def test_encoding2(self):
+ def test_encoding_exact(self):
ElementTree = self.etree.ElementTree
Element = self.etree.Element
@@ -1694,8 +1719,27 @@
f = StringIO()
tree = ElementTree(element=a)
tree.write(f, 'utf-8')
- self.assertEqual(u'S?k p? nettet '.encode('UTF-8'),
- f.getvalue())
+ self.assertEquals(u'S?k p? nettet '.encode('UTF-8'),
+ f.getvalue())
+
+ def test_encoding_latin1(self):
+ ElementTree = self.etree.ElementTree
+ Element = self.etree.Element
+
+ a = Element('a')
+ a.text = u'S?k p? nettet'
+
+ f = StringIO()
+ tree = ElementTree(element=a)
+ tree.write(f, 'iso-8859-1')
+ result = f.getvalue()
+ declaration = ""
+ self.assertEncodingDeclaration(result,'iso-8859-1')
+ result = result.split('?>', 1)[-1]
+ if result[0] == '\n':
+ result = result[1:]
+ self.assertEquals(u'S?k p? nettet '.encode('iso-8859-1'),
+ result)
# raise error on wrong (left-over?) encoding declaration in unicode strings
def _test_wrong_unicode_encoding(self):
@@ -1724,7 +1768,7 @@
a = Element('a')
a.text = u'S?k p? nettet'
- self.assertEqual(u'S?k p? nettet '.encode('UTF-8'),
+ self.assertEquals(u'S?k p? nettet '.encode('UTF-8'),
tostring(a, 'utf-8'))
def test_encoding_tostring_unknown(self):
@@ -1743,7 +1787,7 @@
a = Element('a')
b = SubElement(a, 'b')
b.text = u'S?k p? nettet'
- self.assertEqual(u'S?k p? nettet '.encode('UTF-8'),
+ self.assertEquals(u'S?k p? nettet '.encode('UTF-8'),
tostring(b, 'utf-8'))
def test_encoding_tostring_sub_tail(self):
@@ -1755,7 +1799,7 @@
b = SubElement(a, 'b')
b.text = u'S?k p? nettet'
b.tail = u'S?k'
- self.assertEqual(u'S?k p? nettet S?k'.encode('UTF-8'),
+ self.assertEquals(u'S?k p? nettet S?k'.encode('UTF-8'),
tostring(b, 'utf-8'))
def test_encoding_tostring_default_encoding(self):
@@ -1780,7 +1824,6 @@
b = SubElement(a, 'b')
b.text = u'S?k p? nettet'
- # the same, just hex versus decimal
expected = 'Søk på nettet '
self.assertEquals(
expected,
@@ -1905,6 +1948,13 @@
"""
self.assertEquals(expected, self._writeElement(element, encoding))
self.assertEquals(expected, self._writeElementFile(element, encoding))
+
+ def assertEncodingDeclaration(self, result, encoding):
+ "Checks if the result XML byte string specifies the encoding."
+ has_encoding = re.compile(r"<\?xml[^>]+ encoding=[\"']([^\"']+)[\"']").match
+ self.assert_(has_encoding(result))
+ result_encoding = has_encoding(result).group(1)
+ self.assertEquals(result_encoding.upper(), encoding.upper())
def _rootstring(self, tree):
return self.etree.tostring(tree.getroot()).replace(' ', '').replace('\n', '')
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Mon May 22 16:29:40 2006
@@ -599,7 +599,7 @@
data = f.getvalue()
return canonicalize(data)
-
+
class ETreeXIncludeTestCase(HelperTestCase):
def test_xinclude(self):
tree = etree.parse(fileInTestDir('test_xinclude.xml'))
From scoder at codespeak.net Mon May 22 16:30:35 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 22 May 2006 16:30:35 +0200 (CEST)
Subject: [Lxml-checkins] r27600 - in lxml/trunk: . src/lxml
Message-ID: <20060522143035.ED66E10071@code0.codespeak.net>
Author: scoder
Date: Mon May 22 16:30:34 2006
New Revision: 27600
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/serializer.pxi
lxml/trunk/src/lxml/xmlerror.pxi
Log:
cleanup for thread setup: initThread() instead of initThreadLogging(), xml_declaration keyword in ET.write()
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Mon May 22 16:30:34 2006
@@ -17,6 +17,9 @@
Bugs fixed
----------
+* Removed public function ``initThreadLogging()``, replaced by more general
+ ``initThread()`` which fixes a number of setup problems in threads
+
* Memory leak when using iconv encoders in tostring/write
* Deep copying Elements and ElementTrees maintains the document information
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Mon May 22 16:30:34 2006
@@ -33,6 +33,11 @@
# make the compiled-in debug state publicly available
DEBUG = __DEBUG
+def initThread():
+ "Call this method to set up the library from within a new thread."
+ _initThreadLogging()
+ tree.xmlKeepBlanksDefault(0)
+
# Error superclass for ElementTree compatibility
class Error(Exception):
pass
@@ -361,22 +366,28 @@
def __get__(self):
return DocInfo(self._doc)
- def write(self, file, encoding=None, pretty_print=False):
+ def write(self, file, encoding=None,
+ pretty_print=False, xml_declaration=None):
"""Write the tree to a file or file-like object.
- Defaults to ASCII encoding.
+ Defaults to ASCII encoding and writing a declaration as needed.
"""
+ cdef int c_write_declaration
self._assertHasRoot()
# suppress decl. in default case (purely for ElementTree compatibility)
- if encoding is None:
+ if xml_declaration is not None:
+ c_write_declaration = bool(xml_declaration)
+ if encoding is None:
+ encoding = 'ASCII'
+ elif encoding is None:
encoding = 'ASCII'
- write_declaration = 0
+ c_write_declaration = 0
else:
encoding = encoding.upper()
- write_declaration = encoding not in \
- ('US-ASCII', 'ASCII', 'UTF8', 'UTF-8')
+ c_write_declaration = encoding not in \
+ ('US-ASCII', 'ASCII', 'UTF8', 'UTF-8')
_tofilelike(file, self._context_node, encoding,
- write_declaration, bool(pretty_print))
+ c_write_declaration, bool(pretty_print))
def getiterator(self, tag=None):
root = self.getroot()
@@ -1507,3 +1518,6 @@
include "relaxng.pxi" # RelaxNG
include "xmlschema.pxi" # XMLSchema
+
+# configure main thread
+initThread()
Modified: lxml/trunk/src/lxml/serializer.pxi
==============================================================================
--- lxml/trunk/src/lxml/serializer.pxi (original)
+++ lxml/trunk/src/lxml/serializer.pxi Mon May 22 16:30:34 2006
@@ -1,7 +1,5 @@
# XML serialization and output functions
-tree.xmlKeepBlanksDefault(0)
-
cdef _tostring(_NodeBase element, encoding,
int write_xml_declaration, int pretty_print):
"Serialize an element to an encoded string representation of its XML tree."
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Mon May 22 16:30:34 2006
@@ -9,8 +9,10 @@
Note that this log is already bounded to a fixed size."""
__GLOBAL_ERROR_LOG.clear()
-def initThreadLogging():
- "Setup logging for the current thread."
+cdef void _initThreadLogging():
+ "Setup logging for the current thread. Called from etree.initThread()."
+ # switch on line number reporting
+ xmlparser.xmlLineNumbersDefault(1)
_logLibxmlErrors()
try:
_logLibxsltErrors()
@@ -339,12 +341,6 @@
xmlerror.xmlSetGenericErrorFunc(NULL, _nullGenericErrorFunc)
xmlerror.xmlSetStructuredErrorFunc(NULL, _receiveError)
-# init global logging
-initThreadLogging()
-
-# switch on line number reporting
-xmlparser.xmlLineNumbersDefault(1)
-
################################################################################
## CONSTANTS FROM "xmlerror.pxd"
################################################################################
From scoder at codespeak.net Tue May 23 08:51:08 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 23 May 2006 08:51:08 +0200 (CEST)
Subject: [Lxml-checkins] r27612 - in lxml/trunk/src/lxml: . tests
Message-ID: <20060523065108.408C410064@code0.codespeak.net>
Author: scoder
Date: Tue May 23 08:51:04 2006
New Revision: 27612
Modified:
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/tests/test_etree.py
Log:
let public API functions raise TypeError on the 'parser' argument rather than type checking later (why bother if Pyrex does it for us)
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Tue May 23 08:51:04 2006
@@ -270,7 +270,7 @@
if node_ns_utf is not None:
self._setNodeNs(c_node, node_ns_utf)
-cdef _Document _documentFactory(xmlDoc* c_doc, parser):
+cdef _Document _documentFactory(xmlDoc* c_doc, _BaseParser parser):
cdef _Document result
result = _Document()
result._c_doc = c_doc
@@ -348,7 +348,7 @@
assert self._context_node is not None, \
"ElementTree not initialized, missing root"
- def parse(self, source, parser=None):
+ def parse(self, source, _BaseParser parser=None):
"""Updates self with the content of source and returns its root
"""
self._doc = _parseDocument(source, parser)
@@ -1363,7 +1363,7 @@
_initNodeAttributes(c_node, doc, attrib, _extra)
return _elementFactory(doc, c_node)
-def ElementTree(_Element element=None, file=None, parser=None):
+def ElementTree(_Element element=None, file=None, _BaseParser parser=None):
cdef xmlNode* c_next
cdef xmlNode* c_node
cdef xmlNode* c_node_copy
@@ -1461,7 +1461,7 @@
else:
raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree)
-def parse(source, parser=None):
+def parse(source, _BaseParser parser=None):
"""Return an ElementTree object loaded with source elements. If no parser
is provided as second argument, the default parser is used.
"""
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Tue May 23 08:51:04 2006
@@ -517,7 +517,7 @@
cdef _BaseParser __DEFAULT_PARSER
__DEFAULT_PARSER = __DEFAULT_XML_PARSER
-def set_default_parser(parser=None):
+def set_default_parser(_BaseParser parser=None):
"""Set a default parser. This parser is used globally whenever no parser
is supplied to the various parse functions of the lxml API. If this
function is called without a parser (or if it is None), the default parser
@@ -530,10 +530,8 @@
global __DEFAULT_PARSER
if parser is None:
__DEFAULT_PARSER = __DEFAULT_XML_PARSER
- elif isinstance(parser, _BaseParser):
- __DEFAULT_PARSER = parser
else:
- raise TypeError, "Invalid parser"
+ __DEFAULT_PARSER = parser
def get_default_parser():
return __DEFAULT_PARSER
@@ -580,12 +578,10 @@
## helper functions for document creation
############################################################
-cdef xmlDoc* _parseDoc(text, filename, parser) except NULL:
+cdef xmlDoc* _parseDoc(text, filename, _BaseParser parser) except NULL:
cdef char* c_filename
if parser is None:
parser = __DEFAULT_PARSER
- elif not isinstance(parser, _BaseParser):
- raise TypeError, "invalid parser"
__GLOBAL_PARSER_CONTEXT._initParser()
if not filename:
c_filename = NULL
@@ -596,20 +592,17 @@
else:
return (<_BaseParser>parser)._parseDoc(_cstr(text), c_filename)
-cdef xmlDoc* _parseDocFromFile(filename, parser) except NULL:
+cdef xmlDoc* _parseDocFromFile(filename, _BaseParser parser) except NULL:
if parser is None:
parser = __DEFAULT_PARSER
- elif not isinstance(parser, _BaseParser):
- raise TypeError, "invalid parser"
__GLOBAL_PARSER_CONTEXT._initParser()
return (<_BaseParser>parser)._parseDocFromFile(_cstr(filename))
-cdef xmlDoc* _parseDocFromFilelike(source, filename, parser) except NULL:
+cdef xmlDoc* _parseDocFromFilelike(source, filename,
+ _BaseParser parser) except NULL:
cdef char* c_filename
if parser is None:
parser = __DEFAULT_PARSER
- elif not isinstance(parser, _BaseParser):
- raise TypeError, "invalid parser"
__GLOBAL_PARSER_CONTEXT._initParser()
if not filename:
c_filename = NULL
@@ -654,7 +647,7 @@
## (here we convert to UTF-8)
############################################################
-cdef _Document _parseDocument(source, parser):
+cdef _Document _parseDocument(source, _BaseParser parser):
cdef xmlDoc* c_doc
filename = _getFilenameForFile(source)
if hasattr(source, 'getvalue') and hasattr(source, 'tell'):
@@ -673,7 +666,7 @@
c_doc = _parseDocFromFile(_utf8(filename), parser)
return _documentFactory(c_doc, parser)
-cdef _Document _parseMemoryDocument(text, url, parser):
+cdef _Document _parseMemoryDocument(text, url, _BaseParser parser):
cdef xmlDoc* c_doc
if python.PyUnicode_Check(text):
# pass native unicode only if libxml2 can handle it
@@ -686,7 +679,7 @@
c_doc = _parseDoc(text, url, parser)
return _documentFactory(c_doc, parser)
-cdef _Document _parseFilelikeDocument(source, url, parser):
+cdef _Document _parseFilelikeDocument(source, url, _BaseParser parser):
cdef xmlDoc* c_doc
if url is not None:
url = _utf8(url)
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Tue May 23 08:51:04 2006
@@ -35,6 +35,10 @@
self.assertRaises(SyntaxError, parse, f)
f.close()
+ def test_parse_parser_type_error(self):
+ parse = self.etree.parse
+ self.assertRaises(TypeError, parse, 'notthere.xml', object())
+
def test_parse_error_logging(self):
parse = self.etree.parse
# from StringIO
From scoder at codespeak.net Fri May 26 09:04:40 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 09:04:40 +0200 (CEST)
Subject: [Lxml-checkins] r27690 - lxml/trunk
Message-ID: <20060526070440.5F2D910053@code0.codespeak.net>
Author: scoder
Date: Fri May 26 09:04:36 2006
New Revision: 27690
Modified:
lxml/trunk/CHANGES.txt
Log:
typo
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri May 26 09:04:36 2006
@@ -35,7 +35,7 @@
Features added
--------------
-* Formatted output via ``pretty_print`` keyword to serialization functions
+* Formatted output via ``pretty_print`` keyword in serialization functions
* XSLT can block access to file system and network via ``XSLTAccessControl``
From scoder at codespeak.net Fri May 26 09:35:55 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 09:35:55 +0200 (CEST)
Subject: [Lxml-checkins] r27691 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20060526073555.C6DDC10061@code0.codespeak.net>
Author: scoder
Date: Fri May 26 09:35:30 2006
New Revision: 27691
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/relaxng.pxi
lxml/trunk/src/lxml/tests/test_elementtree.py
lxml/trunk/src/lxml/tests/test_relaxng.py
lxml/trunk/src/lxml/tests/test_xmlschema.py
lxml/trunk/src/lxml/tests/test_xpathevaluator.py
lxml/trunk/src/lxml/tests/test_xslt.py
lxml/trunk/src/lxml/xmlschema.pxi
lxml/trunk/src/lxml/xpath.pxi
lxml/trunk/src/lxml/xslt.pxi
Log:
fix crashes when calling API functions with uninitialized ElementTree objects
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri May 26 09:35:30 2006
@@ -17,6 +17,9 @@
Bugs fixed
----------
+* Crashes when calling XSLT, RelaxNG, etc. with uninitialized ElementTree
+ objects
+
* Removed public function ``initThreadLogging()``, replaced by more general
``initThread()`` which fixes a number of setup problems in threads
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Fri May 26 09:35:30 2006
@@ -10,16 +10,49 @@
c_child = c_child.next
cdef _Document _documentOrRaise(object input):
+ """Call this to get the document of a _Document, _ElementTree or _NodeBase
+ object, or to raise an exception if it can't be determined.
+
+ Should be used in all API functions for consistency.
+ """
cdef _Document doc
- doc = _documentOf(input)
- if doc is None:
+ if isinstance(input, _ElementTree):
+ doc = (<_ElementTree>input)._doc
+ elif isinstance(input, _NodeBase):
+ doc = (<_NodeBase>input)._doc
+ elif isinstance(input, _Document):
+ doc = <_Document>input
+ else:
raise TypeError, "Invalid input object: %s" % type(input)
+ if doc is None:
+ raise ValueError, "Input object has no document: %s" % type(input)
else:
return doc
+cdef _NodeBase _rootNodeOrRaise(object input):
+ """Call this to get the root node of a _Document, _ElementTree or
+ _NodeBase object, or to raise an exception if it can't be determined.
+
+ Should be used in all API functions for consistency.
+ """
+ cdef _NodeBase node
+ if isinstance(input, _ElementTree):
+ node = (<_ElementTree>input)._context_node
+ elif isinstance(input, _NodeBase):
+ node = <_NodeBase>input
+ elif isinstance(input, _Document):
+ node = (<_Document>input).getroot()
+ else:
+ raise TypeError, "Invalid input object: %s" % type(input)
+ if node is None:
+ raise ValueError, "Input object has no element: %s" % type(input)
+ else:
+ return node
+
cdef _Document _documentOf(object input):
# call this to get the document of a
# _Document, _ElementTree or _NodeBase object
+ # may return None!
if isinstance(input, _ElementTree):
return (<_ElementTree>input)._doc
elif isinstance(input, _NodeBase):
@@ -32,6 +65,7 @@
cdef _NodeBase _rootNodeOf(object input):
# call this to get the root node of a
# _Document, _ElementTree or _NodeBase object
+ # may return None!
if isinstance(input, _ElementTree):
return (<_ElementTree>input)._context_node
elif isinstance(input, _NodeBase):
Modified: lxml/trunk/src/lxml/relaxng.pxi
==============================================================================
--- lxml/trunk/src/lxml/relaxng.pxi (original)
+++ lxml/trunk/src/lxml/relaxng.pxi Fri May 26 09:35:30 2006
@@ -28,7 +28,7 @@
fake_c_doc = NULL
if etree is not None:
doc = _documentOrRaise(etree)
- root_node = _rootNodeOf(etree)
+ root_node = _rootNodeOrRaise(etree)
c_node = root_node._c_node
# work around for libxml2 bug if document is not RNG at all
if c_node.ns is NULL or c_node.ns.href is NULL or \
@@ -78,7 +78,7 @@
cdef int ret
doc = _documentOrRaise(etree)
- root_node = _rootNodeOf(etree)
+ root_node = _rootNodeOrRaise(etree)
self._error_log.connect()
valid_ctxt = relaxng.xmlRelaxNGNewValidCtxt(self._c_schema)
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Fri May 26 09:35:30 2006
@@ -1678,7 +1678,7 @@
def test_parse_file_nonexistent(self):
parse = self.etree.parse
self.assertRaises(IOError, parse, fileInTestDir('notthere.xml'))
-
+
def test_parse_file_object(self):
parse = self.etree.parse
# from file object
Modified: lxml/trunk/src/lxml/tests/test_relaxng.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_relaxng.py (original)
+++ lxml/trunk/src/lxml/tests/test_relaxng.py Fri May 26 09:35:30 2006
@@ -25,6 +25,9 @@
self.assert_(schema.validate(tree_valid))
self.assert_(not schema.validate(tree_invalid))
+ def test_relaxng_elementtree_error(self):
+ self.assertRaises(ValueError, etree.RelaxNG, etree.ElementTree())
+
def test_relaxng_error(self):
tree_invalid = self.parse(' ')
schema = self.parse('''\
Modified: lxml/trunk/src/lxml/tests/test_xmlschema.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xmlschema.py (original)
+++ lxml/trunk/src/lxml/tests/test_xmlschema.py Fri May 26 09:35:30 2006
@@ -26,6 +26,9 @@
self.assert_(schema.validate(tree_valid))
self.assert_(not schema.validate(tree_invalid))
+ def test_xmlschema_elementtree_error(self):
+ self.assertRaises(ValueError, etree.XMLSchema, etree.ElementTree())
+
def test_xmlschema_invalid_schema1(self):
schema = self.parse('''\
Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original)
+++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Fri May 26 09:35:30 2006
@@ -300,6 +300,9 @@
def test_xpath_compile_error(self):
self.assertRaises(SyntaxError, etree.XPath, '\\fad')
+ def test_xpath_elementtree_error(self):
+ self.assertRaises(ValueError, etree.XPath('*'), etree.ElementTree())
+
class ETreeETXPathClassTestCase(HelperTestCase):
"Tests for the ETXPath class"
def test_xpath_compile_ns(self):
Modified: lxml/trunk/src/lxml/tests/test_xslt.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xslt.py (original)
+++ lxml/trunk/src/lxml/tests/test_xslt.py Fri May 26 09:35:30 2006
@@ -30,6 +30,9 @@
''',
st.tostring(res))
+ def test_xslt_elementtree_error(self):
+ self.assertRaises(ValueError, etree.XSLT, etree.ElementTree())
+
def test_xslt_utf8(self):
tree = self.parse(u'\uF8D2 \uF8D2 ')
style = self.parse('''\
Modified: lxml/trunk/src/lxml/xmlschema.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlschema.pxi (original)
+++ lxml/trunk/src/lxml/xmlschema.pxi Fri May 26 09:35:30 2006
@@ -26,7 +26,7 @@
self._c_schema = NULL
if etree is not None:
doc = _documentOrRaise(etree)
- root_node = _rootNodeOf(etree)
+ root_node = _rootNodeOrRaise(etree)
# work around for libxml2 bug if document is not XML schema at all
c_node = root_node._c_node
@@ -73,7 +73,7 @@
cdef int ret
doc = _documentOrRaise(etree)
- root_node = _rootNodeOf(etree)
+ root_node = _rootNodeOrRaise(etree)
self._error_log.connect()
valid_ctxt = xmlschema.xmlSchemaNewValidCtxt(self._c_schema)
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Fri May 26 09:35:30 2006
@@ -179,7 +179,7 @@
cdef _XPathContext context
document = _documentOrRaise(_etree_or_element)
- element = _rootNodeOf(_etree_or_element)
+ element = _rootNodeOrRaise(_etree_or_element)
xpathCtxt = self._xpathCtxt
xpathCtxt.doc = document._c_doc
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Fri May 26 09:35:30 2006
@@ -230,7 +230,7 @@
cdef _NodeBase root_node
doc = _documentOrRaise(xslt_input)
- root_node = _rootNodeOf(xslt_input)
+ root_node = _rootNodeOrRaise(xslt_input)
# set access control or raise TypeError
self._access_control = access_control
@@ -287,7 +287,7 @@
cdef Py_ssize_t i, kw_count
input_doc = _documentOrRaise(_input)
- root_node = _rootNodeOf(_input)
+ root_node = _rootNodeOrRaise(_input)
resolver_context = _XSLTResolverContext(input_doc._parser)
resolver_context._c_style_doc = self._xslt_resolver_context._c_style_doc
From scoder at codespeak.net Fri May 26 09:45:39 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 09:45:39 +0200 (CEST)
Subject: [Lxml-checkins] r27692 - lxml/trunk
Message-ID: <20060526074539.5E60110068@code0.codespeak.net>
Author: scoder
Date: Fri May 26 09:45:37 2006
New Revision: 27692
Modified:
lxml/trunk/CHANGES.txt
Log:
cleanup in CHANGES.txt
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri May 26 09:45:37 2006
@@ -7,12 +7,12 @@
Features added
--------------
-* Element.getpath() returns an XPath expression to find the node in the tree
- structure
+* Element.getpath() returns a simple XPath expression to find the node in the
+ tree structure
-* Error logs now have a ``last_error`` attribute for convenience
+* Error logs have a ``last_error`` attribute for convenience
-* Comment texts can now be changed through the API
+* Comment texts can be changed through the API
Bugs fixed
----------
@@ -27,7 +27,7 @@
* Deep copying Elements and ElementTrees maintains the document information
-* Serialization functions now raise LookupError for unknown encodings
+* Serialization functions raise LookupError for unknown encodings
* Memory deallocation crash resulting from deep copying elements
@@ -55,8 +55,8 @@
* Parsing a unicode string no longer copies the string (reduced memory
footprint)
-* Parsing file-like objects now reads chunks rather than the whole file
- (reduced memory footprint)
+* Parsing file-like objects reads chunks rather than the whole file (reduced
+ memory footprint)
* Parsing StringIO objects from the start avoids copying the string (reduced
memory footprint)
@@ -69,7 +69,7 @@
* Better error messages in parser exceptions
-* Error reporting now also works in XSLT
+* Error reporting also works in XSLT
* Support for custom document loaders (URI resolvers) in parsers and XSLT,
resolvers are registered at parser level
@@ -96,7 +96,7 @@
* Element/SubElement failed to set attribute namespaces from passed ``attrib``
dictionary
-* ``tostring()`` now adds an XML declaration for non-ASCII encodings
+* ``tostring()`` adds an XML declaration for non-ASCII encodings
* ``tostring()`` failed to serialize encodings that contain 0-bytes
@@ -111,14 +111,14 @@
Features added
--------------
-* Speedup for Element.makeelement(): the new element now reuses the original
+* Speedup for Element.makeelement(): the new element reuses the original
libxml2 document instead of creating a new empty one
* Speedup for reversed() iteration over element children (Py2.4+ only)
* ElementTree compatible QName class
-* RelaxNG and XMLSchema now accept any Element, not only ElementTrees
+* RelaxNG and XMLSchema accept any Element, not only ElementTrees
Bugs fixed
----------
@@ -140,7 +140,7 @@
* lxml.sax.ElementTreeContentHandler checks closing elements and raises
SaxError on mismatch
-* lxml.sax.ElementTreeContentHandler now supports namespace-less SAX events
+* lxml.sax.ElementTreeContentHandler supports namespace-less SAX events
(startElement, endElement) and defaults to empty attributes (keyword
argument)
@@ -209,8 +209,8 @@
* ElementTree objects no longer interfere, Elements can be root of different
ElementTrees at the same time
-* document('') now works in XSLT documents read from files (in-memory
- documents cannot support this due to libxslt deficiencies)
+* document('') works in XSLT documents read from files (in-memory documents
+ cannot support this due to libxslt deficiencies)
0.8 (2005-11-03)
================
@@ -225,7 +225,7 @@
that it works than if copy.copy() isn't supported at all.
* Increased compatibility with (c)ElementTree; .parse() on ElementTree is
- now supported and parsing of gzipped XML files works.
+ supported and parsing of gzipped XML files works.
* implemented index() on elements, allowing one to find the index of a
SubElement.
@@ -249,11 +249,10 @@
* Fixed error with uncaught exception in Pyrex code.
-* Calling lxml.etree.fromstring('') now throws XMLSyntaxError instead
- of a segfault.
+* Calling lxml.etree.fromstring('') throws XMLSyntaxError instead of a
+ segfault.
-* has_key() now works on attrib. 'in' tests also work correctly now on
- attrib.
+* has_key() works on attrib. 'in' tests also work correctly on attrib.
* INSTALL.txt was saying 2.2.16 instead of 2.6.16 as a supported
libxml2 version, as it should.
@@ -267,8 +266,8 @@
Features added
--------------
-* parameters (XPath expressions) can now be passed to XSLT using
- keyword parameters.
+* parameters (XPath expressions) can be passed to XSLT using keyword
+ parameters.
* Simple XInclude support. Calling the xinclude() method on a tree
will process any XInclude statements in the document.
@@ -315,16 +314,15 @@
* Can pass None to 'dump()' without segfaults.
-* tostring() now works properly for non-root elements as well.
+* tostring() works properly for non-root elements as well.
* Cleaned out the tostring() method so it should handle encoding
correctly.
-* Cleaned out the ElementTree.write() method so it should handle
- encoding correctly. Writing directly to a file should also be faster
- now, as there is no need to go through a Python string in that
- case. Made sure the test cases test both serializing to StringIO as
- well as serializing to a real file.
+* Cleaned out the ElementTree.write() method so it should handle encoding
+ correctly. Writing directly to a file should also be faster, as there is no
+ need to go through a Python string in that case. Made sure the test cases
+ test both serializing to StringIO as well as serializing to a real file.
0.6 (2005-05-14)
================
From scoder at codespeak.net Fri May 26 10:20:37 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 10:20:37 +0200 (CEST)
Subject: [Lxml-checkins] r27697 - in lxml/trunk: . doc src/lxml
Message-ID: <20060526082037.07E841007C@code0.codespeak.net>
Author: scoder
Date: Fri May 26 10:20:33 2006
New Revision: 27697
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/api.txt
lxml/trunk/src/lxml/etree.pyx
Log:
moved getpath() method from _Element to _ElementTree as we are dealing with absolute paths, so access through ElementTree makes more sense
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri May 26 10:20:33 2006
@@ -7,8 +7,8 @@
Features added
--------------
-* Element.getpath() returns a simple XPath expression to find the node in the
- tree structure
+* ElementTree.getpath(element) returns a simple, absolute XPath expression to
+ find the element in the tree structure
* Error logs have a ``last_error`` attribute for convenience
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Fri May 26 10:20:33 2006
@@ -244,19 +244,25 @@
>>> r[0].text
'Text'
-A related convenience method of Elements is ``getpath()``, which returns a
-structural XPath expression for the respective element::
+A related convenience method of ElementTree is ``getpath(element)``, which
+returns a structural XPath expression for an element::
>>> a = etree.Element("a")
>>> b = etree.SubElement(a, "b")
>>> c = etree.SubElement(a, "c")
>>> d1 = etree.SubElement(c, "d")
>>> d2 = etree.SubElement(c, "d")
- >>> print d2.getpath()
+
+ >>> tree = etree.ElementTree(a)
+ >>> print tree.getpath(d2)
/a/c/d[2]
- >>> a.xpath(d2.getpath()) == [d2]
+ >>> a.xpath(tree.getpath(d2)) == [d2]
True
+ >>> tree = etree.ElementTree(c)
+ >>> print tree.getpath(d2)
+ /c/d[2]
+
XSLT
----
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Fri May 26 10:20:33 2006
@@ -389,6 +389,20 @@
_tofilelike(file, self._context_node, encoding,
c_write_declaration, bool(pretty_print))
+ def getpath(self, _NodeBase element not None):
+ cdef xmlDoc* c_doc
+ cdef char* c_path
+ if element._doc is not self._doc:
+ raise ValueError, "Element is not in this tree."
+ c_doc = _fakeRootDoc(self._doc._c_doc, self._context_node._c_node)
+ c_path = tree.xmlGetNodePath(element._c_node)
+ _destroyFakeDoc(self._doc._c_doc, c_doc)
+ if c_path is NULL:
+ raise LxmlError, "Error creating node path."
+ path = c_path
+ tree.xmlFree(c_path)
+ return path
+
def getiterator(self, tag=None):
root = self.getroot()
if root is None:
@@ -900,15 +914,6 @@
return _elementFactory(self._doc, c_node)
return None
- def getpath(self):
- cdef char* c_path
- c_path = tree.xmlGetNodePath(self._c_node)
- if c_path is NULL:
- raise LxmlError, "Error creating node path."
- path = c_path
- tree.xmlFree(c_path)
- return path
-
def getiterator(self, tag=None):
return ElementDepthFirstIterator(self, tag)
From scoder at codespeak.net Fri May 26 11:48:44 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 11:48:44 +0200 (CEST)
Subject: [Lxml-checkins] r27705 - lxml/trunk/src/lxml
Message-ID: <20060526094844.3E78F1007B@code0.codespeak.net>
Author: scoder
Date: Fri May 26 11:48:42 2006
New Revision: 27705
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/proxy.pxi
Log:
moved _fakeRootDoc and _destroyFakeDoc to proxy.pxi: is related as it changes node._private and hooks into in-memory stucture
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Fri May 26 11:48:42 2006
@@ -75,56 +75,6 @@
else:
return None
-cdef xmlDoc* _fakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node):
- # build a temporary document that has the given node as root node
- # note that copy and original must not be modified during its lifetime!!
- # always call _destroyFakeDoc() after use!
- cdef xmlNode* c_child
- cdef xmlNode* c_root
- cdef xmlDoc* c_doc
- c_root = tree.xmlDocGetRootElement(c_base_doc)
- if c_root == c_node:
- # already the root node
- return c_base_doc
-
- c_doc = _copyDoc(c_base_doc, 0) # non recursive!
- c_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive!
-
- c_root.children = c_node.children
- c_root.last = c_node.last
- c_root.next = c_root.prev = c_root.parent = NULL
-
- # store original node
- c_root._private = c_node
-
- # divert parent pointers of children
- c_child = c_root.children
- while c_child is not NULL:
- c_child.parent = c_root
- c_child = c_child.next
-
- c_doc.children = c_root
- return c_doc
-
-cdef void _destroyFakeDoc(xmlDoc* c_base_doc, xmlDoc* c_doc):
- # delete a temporary document
- cdef xmlNode* c_child
- cdef xmlNode* c_parent
- cdef xmlNode* c_root
- if c_doc != c_base_doc:
- c_root = tree.xmlDocGetRootElement(c_doc)
-
- # restore parent pointers of children
- c_parent = c_root._private
- c_child = c_root.children
- while c_child is not NULL:
- c_child.parent = c_parent
- c_child = c_child.next
-
- # prevent recursive removal of children
- c_root.children = c_root.last = c_root._private = NULL
- tree.xmlFreeDoc(c_doc)
-
cdef object _attributeValue(xmlNode* c_element, xmlNode* c_attrib_node):
cdef char* value
if c_attrib_node.ns is NULL or c_attrib_node.ns.href is NULL:
Modified: lxml/trunk/src/lxml/proxy.pxi
==============================================================================
--- lxml/trunk/src/lxml/proxy.pxi (original)
+++ lxml/trunk/src/lxml/proxy.pxi Fri May 26 11:48:42 2006
@@ -75,6 +75,58 @@
#print "Proxy:", proxy, "Proxy type:", proxy_type
assert 0, "Tried to unregister unknown proxy"
+################################################################################
+# temporarily make a node the root node of its document
+
+cdef xmlDoc* _fakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node):
+ # build a temporary document that has the given node as root node
+ # note that copy and original must not be modified during its lifetime!!
+ # always call _destroyFakeDoc() after use!
+ cdef xmlNode* c_child
+ cdef xmlNode* c_root
+ cdef xmlDoc* c_doc
+ c_root = tree.xmlDocGetRootElement(c_base_doc)
+ if c_root == c_node:
+ # already the root node
+ return c_base_doc
+
+ c_doc = _copyDoc(c_base_doc, 0) # non recursive!
+ c_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive!
+
+ c_root.children = c_node.children
+ c_root.last = c_node.last
+ c_root.next = c_root.prev = c_root.parent = NULL
+
+ # store original node
+ c_root._private = c_node
+
+ # divert parent pointers of children
+ c_child = c_root.children
+ while c_child is not NULL:
+ c_child.parent = c_root
+ c_child = c_child.next
+
+ c_doc.children = c_root
+ return c_doc
+
+cdef void _destroyFakeDoc(xmlDoc* c_base_doc, xmlDoc* c_doc):
+ # delete a temporary document
+ cdef xmlNode* c_child
+ cdef xmlNode* c_parent
+ cdef xmlNode* c_root
+ if c_doc != c_base_doc:
+ c_root = tree.xmlDocGetRootElement(c_doc)
+
+ # restore parent pointers of children
+ c_parent = c_root._private
+ c_child = c_root.children
+ while c_child is not NULL:
+ c_child.parent = c_parent
+ c_child = c_child.next
+
+ # prevent recursive removal of children
+ c_root.children = c_root.last = c_root._private = NULL
+ tree.xmlFreeDoc(c_doc)
################################################################################
# support for freeing tree elements when proxy objects are destroyed
From scoder at codespeak.net Fri May 26 13:38:40 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 13:38:40 +0200 (CEST)
Subject: [Lxml-checkins] r27724 - lxml/trunk/src/lxml
Message-ID: <20060526113840.3B2E01007C@code0.codespeak.net>
Author: scoder
Date: Fri May 26 13:38:18 2006
New Revision: 27724
Modified:
lxml/trunk/src/lxml/proxy.pxi
lxml/trunk/src/lxml/xslt.pxi
Log:
fixes in_fakeRootDoc(): store original root node in document rather than new root node to allow instantiating new root node
Modified: lxml/trunk/src/lxml/proxy.pxi
==============================================================================
--- lxml/trunk/src/lxml/proxy.pxi (original)
+++ lxml/trunk/src/lxml/proxy.pxi Fri May 26 13:38:18 2006
@@ -86,19 +86,20 @@
cdef xmlNode* c_root
cdef xmlDoc* c_doc
c_root = tree.xmlDocGetRootElement(c_base_doc)
- if c_root == c_node:
+ if c_root is c_node:
# already the root node
return c_base_doc
c_doc = _copyDoc(c_base_doc, 0) # non recursive!
c_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive!
+ tree.xmlDocSetRootElement(c_doc, c_root)
c_root.children = c_node.children
c_root.last = c_node.last
c_root.next = c_root.prev = c_root.parent = NULL
# store original node
- c_root._private = c_node
+ c_doc._private = c_node
# divert parent pointers of children
c_child = c_root.children
@@ -118,14 +119,14 @@
c_root = tree.xmlDocGetRootElement(c_doc)
# restore parent pointers of children
- c_parent = c_root._private
+ c_parent = c_doc._private
c_child = c_root.children
while c_child is not NULL:
c_child.parent = c_parent
c_child = c_child.next
# prevent recursive removal of children
- c_root.children = c_root.last = c_root._private = NULL
+ c_root.children = c_root.last = NULL
tree.xmlFreeDoc(c_doc)
################################################################################
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Fri May 26 13:38:18 2006
@@ -306,7 +306,7 @@
if self._access_control is not None:
self._access_control._register_in_context(transform_ctxt)
- ptemp = c_doc._private
+ ptemp = c_doc._private # store original _private pointer!
c_doc._private = resolver_context
kw_count = python.PyDict_Size(_kw)
From scoder at codespeak.net Fri May 26 15:35:07 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 15:35:07 +0200 (CEST)
Subject: [Lxml-checkins] r27726 - in lxml/trunk: . doc src/lxml
src/lxml/tests
Message-ID: <20060526133507.CB88F1007C@code0.codespeak.net>
Author: scoder
Date: Fri May 26 15:35:02 2006
New Revision: 27726
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/api.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_xpathevaluator.py
lxml/trunk/src/lxml/xmlid.pxi
lxml/trunk/src/lxml/xpath.pxi
Log:
fix semantics of absolute XPath expressions in XPathDocumentEvaluator and ET.xpath() by using _fakeRootDoc(), raise exception on Element.xpath('/...') etc.
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri May 26 15:35:02 2006
@@ -17,6 +17,11 @@
Bugs fixed
----------
+* Running absolute XPath expressions on Elements now raises an exception in
+ most cases. Otherwise, the behaviour is explicitly marked as undefined.
+
+* Evaluating absolute XPath expressions ('/*') on an ElementTree could fail
+
* Crashes when calling XSLT, RelaxNG, etc. with uninitialized ElementTree
objects
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Fri May 26 15:35:02 2006
@@ -244,8 +244,8 @@
>>> r[0].text
'Text'
-A related convenience method of ElementTree is ``getpath(element)``, which
-returns a structural XPath expression for an element::
+A related convenience method of ElementTree objects is ``getpath(element)``,
+which returns a structural, absolute XPath expression to find that element::
>>> a = etree.Element("a")
>>> b = etree.SubElement(a, "b")
@@ -253,16 +253,11 @@
>>> d1 = etree.SubElement(c, "d")
>>> d2 = etree.SubElement(c, "d")
- >>> tree = etree.ElementTree(a)
- >>> print tree.getpath(d2)
- /a/c/d[2]
- >>> a.xpath(tree.getpath(d2)) == [d2]
- True
-
>>> tree = etree.ElementTree(c)
>>> print tree.getpath(d2)
/c/d[2]
-
+ >>> tree.xpath(tree.getpath(d2)) == [d2]
+ True
XSLT
----
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Fri May 26 15:35:02 2006
@@ -447,7 +447,7 @@
XPathEvaluator directly.
"""
self._assertHasRoot()
- evaluator = XPathElementEvaluator(self._context_node, namespaces)
+ evaluator = XPathDocumentEvaluator(self, namespaces)
return evaluator.evaluate(_path, **_variables)
def xslt(self, _xslt, extensions=None, **_kw):
Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original)
+++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Fri May 26 15:35:02 2006
@@ -74,8 +74,8 @@
c = root[0]
self.assertEquals([c[0], c[1]],
c.xpath('b'))
- self.assertEquals([c[0], c[1], root[1][0]],
- c.xpath('//b'))
+ self.assertEquals([c[0], c[1]],
+ c.xpath('.//b'))
def test_xpath_ns(self):
tree = self.parse(' ')
@@ -88,15 +88,41 @@
tree.xpath('//foo:b', {'foo': 'uri:c'}))
self.assertEquals(
[root[0]],
- root.xpath('//baz:b', {'baz': 'uri:a'}))
+ root.xpath('.//baz:b', {'baz': 'uri:a'}))
self.assertRaises(
TypeError,
- root.xpath, '//b', {None: 'uri:a'})
+ root.xpath, './/b', {None: 'uri:a'})
def test_xpath_error(self):
tree = self.parse(' ')
self.assertRaises(SyntaxError, tree.xpath, '\\fad')
+ def test_elementtree_getpath(self):
+ a = etree.Element("a")
+ b = etree.SubElement(a, "b")
+ c = etree.SubElement(a, "c")
+ d1 = etree.SubElement(c, "d")
+ d2 = etree.SubElement(c, "d")
+
+ tree = etree.ElementTree(a)
+ self.assertEqual('/a/c/d',
+ tree.getpath(d2)[:6])
+ self.assertEqual([d2],
+ tree.xpath(tree.getpath(d2)))
+
+ def test_elementtree_getpath_partial(self):
+ a = etree.Element("a")
+ b = etree.SubElement(a, "b")
+ c = etree.SubElement(a, "c")
+ d1 = etree.SubElement(c, "d")
+ d2 = etree.SubElement(c, "d")
+
+ tree = etree.ElementTree(c)
+ self.assertEqual('/c/d',
+ tree.getpath(d2)[:4])
+ self.assertEqual([d2],
+ tree.xpath(tree.getpath(d2)))
+
def test_xpath_evaluator(self):
tree = self.parse(' ')
e = etree.XPathEvaluator(tree)
Modified: lxml/trunk/src/lxml/xmlid.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlid.pxi (original)
+++ lxml/trunk/src/lxml/xmlid.pxi Fri May 26 15:35:02 2006
@@ -8,7 +8,7 @@
root = XML(text)
# ElementTree compatible implementation: look for 'id' attributes
dic = {}
- for elem in root.xpath('//*[string(@id)]'):
+ for elem in ElementTree(root).xpath('//*[string(@id)]'):
python.PyDict_SetItem(dic, elem.get('id'), elem)
return (root, dic)
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Fri May 26 15:35:02 2006
@@ -88,6 +88,9 @@
cdef class XPathElementEvaluator(XPathEvaluatorBase):
"""Create an XPath evaluator for an element.
+ Note that the result of evaluating absolute XPath expressions (starting
+ with '/') is undefined for Elements. Use an ElementTree instead.
+
XPath evaluators must not be shared between threads.
"""
cdef _Element _element
@@ -114,25 +117,33 @@
add = self._context.addNamespace
for prefix, uri in namespaces.items():
add(prefix, uri)
-
+
def evaluate(self, _path, **_variables):
- """Evaluate an XPath expression on the document. Variables may be
- provided as keyword arguments. Note that namespaces are currently not
- supported for variables."""
+ """Evaluate an XPath expression on the document.
+
+ Variables may be provided as keyword arguments. Note that namespaces
+ are currently not supported for variables.
+
+ The result of evaluating absolute XPath expressions (starting with
+ '/') is undefined for Elements. Use an ElementTree instead.
+ """
cdef xpath.xmlXPathContext* xpathCtxt
cdef xpath.xmlXPathObject* xpathObj
cdef xmlNode* c_node
cdef _Document doc
+ path = _utf8(_path)
+ if path.lstrip().startswith('/'):
+ raise LxmlSyntaxError, "cannot use absolute path on element"
xpathCtxt = self._xpathCtxt
xpathCtxt.node = self._element._c_node
doc = self._element._doc
self._context.register_context(xpathCtxt, doc)
- self._context.registerVariables(_variables)
-
- path = _utf8(_path)
- xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt)
- self._context.unregister_context()
+ try:
+ self._context.registerVariables(_variables)
+ xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt)
+ finally:
+ self._context.unregister_context()
return self._handle_result(xpathObj, doc)
@@ -146,9 +157,39 @@
XPathElementEvaluator.__init__(
self, etree._context_node, namespaces, extensions)
+ def evaluate(self, _path, **_variables):
+ """Evaluate an XPath expression on the document.
+
+ Variables may be provided as keyword arguments. Note that namespaces
+ are currently not supported for variables.
+ """
+ cdef xpath.xmlXPathContext* xpathCtxt
+ cdef xpath.xmlXPathObject* xpathObj
+ cdef xmlNode* c_node
+ cdef xmlDoc* c_doc
+ cdef _Document doc
+ path = _utf8(_path)
+ xpathCtxt = self._xpathCtxt
+ doc = self._element._doc
+
+ self._context.register_context(xpathCtxt, doc)
+ c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node)
+ try:
+ self._context.registerVariables(_variables)
+ xpathCtxt.doc = c_doc
+ xpathCtxt.node = tree.xmlDocGetRootElement(c_doc)
+ xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt)
+ finally:
+ _destroyFakeDoc(doc._c_doc, c_doc)
+ self._context.unregister_context()
+
+ return self._handle_result(xpathObj, doc)
+
def XPathEvaluator(etree_or_element, namespaces=None, extensions=None):
- """Creates and XPath evaluator for an ElementTree or an Element.
+ """Creates and XPath evaluator for an ElementTree or an Element. Note
+ that the result of absolute XPath expressions (starting with '/') is
+ undefined for Elements. Use an ElementTree instead.
XPath evaluators must not be shared between threads.
"""
@@ -161,11 +202,14 @@
cdef class XPath(XPathEvaluatorBase):
cdef xpath.xmlXPathCompExpr* _xpath
cdef readonly object path
+ cdef int _absolute
def __init__(self, path, namespaces=None, extensions=None):
XPathEvaluatorBase.__init__(self, namespaces, extensions, None)
+ self._xpath = NULL
self.path = path
path = _utf8(path)
+ self._absolute = path.lstrip().startswith('/')
self._xpath = xpath.xmlXPathCompile(_cstr(path))
if self._xpath is NULL:
self._raise_parse_error()
@@ -180,18 +224,21 @@
document = _documentOrRaise(_etree_or_element)
element = _rootNodeOrRaise(_etree_or_element)
+
+ if self._absolute and element is _etree_or_element:
+ raise ValueError, "cannot use absolute path on element"
xpathCtxt = self._xpathCtxt
xpathCtxt.doc = document._c_doc
xpathCtxt.node = element._c_node
context = self._context
- context._release_temp_refs()
context.register_context(xpathCtxt, document)
- context.registerVariables(_variables)
-
- xpathObj = xpath.xmlXPathCompiledEval(self._xpath, xpathCtxt)
- context.unregister_context()
+ try:
+ context.registerVariables(_variables)
+ xpathObj = xpath.xmlXPathCompiledEval(self._xpath, xpathCtxt)
+ finally:
+ context.unregister_context()
return self._handle_result(xpathObj, document)
def evaluate(self, _tree, **_variables):
From scoder at codespeak.net Fri May 26 16:13:40 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 16:13:40 +0200 (CEST)
Subject: [Lxml-checkins] r27727 - lxml/trunk/doc
Message-ID: <20060526141340.139891007C@code0.codespeak.net>
Author: scoder
Date: Fri May 26 16:13:38 2006
New Revision: 27727
Modified:
lxml/trunk/doc/api.txt
Log:
doc clarification on using absolute vs. relative expressions on ElementTree and Element
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Fri May 26 16:13:38 2006
@@ -192,14 +192,24 @@
xpath method on ElementTree, Element
------------------------------------
-lxml.etree extends the ElementTree and Element interfaces with an xpath
-method. For ElementTree, the xpath method performs a global xpath query
-against the document. When xpath is used on an element, the xpath expression
-is performed taking the element as the xpath context node.
+lxml.etree supports the simple path syntax of the ``findall()`` etc. methods
+on ElementTree and Element, as known from the original ElementTree library.
+As an extension, these classes also provide an ``xpath()`` method that
+supports expressions in the complete XPath syntax.
+
+For ElementTree, the xpath method performs a global xpath query against the
+document (if absolute) or against the root node (if relative).
+
+When xpath is used on an element, the xpath expression is performed taking the
+element as the xpath context node. Note that it is illegal to run an absolute
+XPath expression (like ``/a``) against an element. The result is undefined.
You call the xpath() method with the XPath expression to use. Optionally, you
can provide a second argument, which should be a dictionary mapping the
-namespace prefixes used in the XPath expression to namespace URIs.
+namespace prefixes used in the XPath expression to namespace URIs. The
+optional third argument is used to define `extension functions`_.
+
+.. _`extension functions`: extensions.html
The return values of xpath vary, depending on the XPath expression used:
From scoder at codespeak.net Fri May 26 16:13:50 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 16:13:50 +0200 (CEST)
Subject: [Lxml-checkins] r27728 - lxml/trunk/src/lxml
Message-ID: <20060526141350.4B5E81007C@code0.codespeak.net>
Author: scoder
Date: Fri May 26 16:13:49 2006
New Revision: 27728
Modified:
lxml/trunk/src/lxml/xpath.pxi
Log:
cleanup
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Fri May 26 16:13:49 2006
@@ -129,7 +129,6 @@
"""
cdef xpath.xmlXPathContext* xpathCtxt
cdef xpath.xmlXPathObject* xpathObj
- cdef xmlNode* c_node
cdef _Document doc
path = _utf8(_path)
if path.lstrip().startswith('/'):
@@ -165,7 +164,6 @@
"""
cdef xpath.xmlXPathContext* xpathCtxt
cdef xpath.xmlXPathObject* xpathObj
- cdef xmlNode* c_node
cdef xmlDoc* c_doc
cdef _Document doc
path = _utf8(_path)
@@ -209,10 +207,10 @@
self._xpath = NULL
self.path = path
path = _utf8(path)
- self._absolute = path.lstrip().startswith('/')
self._xpath = xpath.xmlXPathCompile(_cstr(path))
if self._xpath is NULL:
self._raise_parse_error()
+ self._absolute = path.lstrip().startswith('/')
self._xpathCtxt = xpath.xmlXPathNewContext(NULL)
def __call__(self, _etree_or_element, **_variables):
From scoder at codespeak.net Fri May 26 18:25:04 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 18:25:04 +0200 (CEST)
Subject: [Lxml-checkins] r27731 - in lxml/trunk: . src/lxml
Message-ID: <20060526162504.54BED10088@code0.codespeak.net>
Author: scoder
Date: Fri May 26 18:25:01 2006
New Revision: 27731
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/xslt.pxi
Log:
fix: document reference in ElementTree objects was not updated when their root element was moved to a different document
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri May 26 18:25:01 2006
@@ -17,6 +17,9 @@
Bugs fixed
----------
+* Document reference in ElementTree objects was not updated when the root
+ element was moved to a different document
+
* Running absolute XPath expressions on Elements now raises an exception in
most cases. Otherwise, the behaviour is explicitly marked as undefined.
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Fri May 26 18:25:01 2006
@@ -16,8 +16,11 @@
Should be used in all API functions for consistency.
"""
cdef _Document doc
+ cdef _NodeBase element
if isinstance(input, _ElementTree):
- doc = (<_ElementTree>input)._doc
+ element = (<_ElementTree>input)._context_node
+ if element is not None:
+ doc = element._doc
elif isinstance(input, _NodeBase):
doc = (<_NodeBase>input)._doc
elif isinstance(input, _Document):
@@ -53,14 +56,16 @@
# call this to get the document of a
# _Document, _ElementTree or _NodeBase object
# may return None!
+ cdef _NodeBase element
if isinstance(input, _ElementTree):
- return (<_ElementTree>input)._doc
+ element = (<_ElementTree>input)._context_node
+ if element is not None:
+ return element._doc
elif isinstance(input, _NodeBase):
return (<_NodeBase>input)._doc
elif isinstance(input, _Document):
return <_Document>input
- else:
- return None
+ return None
cdef _NodeBase _rootNodeOf(object input):
# call this to get the root node of a
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Fri May 26 18:25:01 2006
@@ -2,6 +2,7 @@
from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement
from python cimport isinstance, issubclass, hasattr, callable
from python cimport iter, str, _cstr, Py_ssize_t
+cimport xpath
cimport xinclude
cimport c14n
cimport cstd
@@ -339,6 +340,10 @@
cdef _Document _doc
cdef _NodeBase _context_node
+ # Note that _doc is only used to store the original document if we do not
+ # have a _context_node. All methods should prefer self._context_node._doc
+ # to honour tree restructuring
+
cdef _assertHasRoot(self):
"""We have to take care here: the document may not have a root node!
This can happen if ElementTree() is called without any argument and
@@ -351,10 +356,11 @@
def parse(self, source, _BaseParser parser=None):
"""Updates self with the content of source and returns its root
"""
- self._doc = _parseDocument(source, parser)
- self._context_node = self._doc.getroot()
+ cdef _Document doc
+ doc = _parseDocument(source, parser)
+ self._context_node = doc.getroot()
return self._context_node
-
+
def getroot(self):
return self._context_node
@@ -364,7 +370,8 @@
of a parsed document (e.g. those returned by the parse functions).
"""
def __get__(self):
- return DocInfo(self._doc)
+ self._assertHasRoot()
+ return DocInfo(self._context_node._doc)
def write(self, file, encoding=None,
pretty_print=False, xml_declaration=None):
@@ -390,13 +397,15 @@
c_write_declaration, bool(pretty_print))
def getpath(self, _NodeBase element not None):
+ cdef _Document doc
cdef xmlDoc* c_doc
cdef char* c_path
- if element._doc is not self._doc:
+ doc = self._context_node._doc
+ if element._doc is not doc:
raise ValueError, "Element is not in this tree."
- c_doc = _fakeRootDoc(self._doc._c_doc, self._context_node._c_node)
+ c_doc = _fakeRootDoc(doc._c_doc, self._context_node._c_node)
c_path = tree.xmlGetNodePath(element._c_node)
- _destroyFakeDoc(self._doc._c_doc, c_doc)
+ _destroyFakeDoc(doc._c_doc, c_doc)
if c_path is NULL:
raise LxmlError, "Error creating node path."
path = c_path
@@ -521,7 +530,7 @@
cdef char* data
cdef int bytes
self._assertHasRoot()
- c_base_doc = self._doc._c_doc
+ c_base_doc = self._context_node._doc._c_doc
c_doc = _fakeRootDoc(c_base_doc, self._context_node._c_node)
bytes = c14n.xmlC14NDocDumpMemory(c_doc, NULL, 0, NULL, 1, &data)
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Fri May 26 18:25:01 2006
@@ -375,9 +375,16 @@
cdef class _XSLTResultTree(_ElementTree):
cdef XSLT _xslt
cdef _saveToStringAndSize(self, char** s, int* l):
+ cdef _Document doc
cdef int r
- r = xslt.xsltSaveResultToString(s, l, self._doc._c_doc,
- self._xslt._c_style)
+ if self._context_node is not None:
+ doc = self._context_node._doc
+ if doc is None:
+ doc = self._doc
+ if doc is None:
+ s[0] = NULL
+ return
+ r = xslt.xsltSaveResultToString(s, l, doc._c_doc, self._xslt._c_style)
if r == -1:
raise XSLTSaveError, "Error saving XSLT result to string"
From scoder at codespeak.net Fri May 26 18:27:36 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 18:27:36 +0200 (CEST)
Subject: [Lxml-checkins] r27732 - lxml/trunk/src/lxml
Message-ID: <20060526162736.A3C0010088@code0.codespeak.net>
Author: scoder
Date: Fri May 26 18:27:33 2006
New Revision: 27732
Modified:
lxml/trunk/src/lxml/extensions.pxi
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/tree.pxd
lxml/trunk/src/lxml/xpath.pxd
lxml/trunk/src/lxml/xpath.pxi
Log:
reuse the parser dictionary also for XPath parsing
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Fri May 26 18:27:33 2006
@@ -1,7 +1,5 @@
# supports for extension functions in XPath and XSLT
-cimport xpath
-
class XPathError(LxmlError):
pass
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Fri May 26 18:27:33 2006
@@ -42,6 +42,15 @@
pctxt.dict = self._c_dict
xmlparser.xmlDictReference(pctxt.dict)
+ cdef void _initXPathParserDict(self, xpath.xmlXPathContext* pctxt):
+ "Assure we always use the same string dictionary."
+ if self._c_dict is NULL or self._c_dict is pctxt.dict:
+ return
+ if pctxt.dict is not NULL:
+ xmlparser.xmlDictFree(pctxt.dict)
+ pctxt.dict = self._c_dict
+ xmlparser.xmlDictReference(pctxt.dict)
+
cdef void _initDocDict(self, xmlDoc* result):
"Store dict of last object parsed if no shared dict yet"
if result is NULL:
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Fri May 26 18:27:33 2006
@@ -52,7 +52,6 @@
# for some reason need to define this in this section;
# libxml/dict.h appears to be broken to include in C
ctypedef struct xmlDict
- cdef int xmlDictOwns(xmlDict* dict, char* name)
ctypedef struct xmlDoc
ctypedef struct xmlAttr
Modified: lxml/trunk/src/lxml/xpath.pxd
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxd (original)
+++ lxml/trunk/src/lxml/xpath.pxd Fri May 26 18:27:33 2006
@@ -54,6 +54,7 @@
ctypedef struct xmlXPathContext:
tree.xmlDoc* doc
tree.xmlNode* node
+ tree.xmlDict* dict
char* function
char* functionURI
# actually signature is void (*error)(void*, xmlError*)
@@ -81,6 +82,8 @@
cdef xmlXPathObject* xmlXPathCompiledEval(xmlXPathCompExpr* comp,
xmlXPathContext* ctxt)
cdef xmlXPathCompExpr* xmlXPathCompile(char* str)
+ cdef xmlXPathCompExpr* xmlXPathCtxtCompile(xmlXPathContext* ctxt,
+ char* str)
cdef void xmlXPathFreeContext(xmlXPathContext* ctxt)
cdef void xmlXPathFreeCompExpr(xmlXPathCompExpr* comp)
cdef void xmlXPathFreeObject(xmlXPathObject* obj)
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Fri May 26 18:27:33 2006
@@ -44,6 +44,8 @@
xpath.xmlXPathRegisterVariable(
self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value))
+cdef void _setupDict(xpath.xmlXPathContext* xpathCtxt):
+ __GLOBAL_PARSER_CONTEXT._initXPathParserDict(xpathCtxt)
cdef class XPathEvaluatorBase:
cdef xpath.xmlXPathContext* _xpathCtxt
@@ -103,6 +105,7 @@
self._xpathCtxt = xpathCtxt
if xpathCtxt is NULL:
raise XPathContextError, "Unable to create new XPath context"
+ _setupDict(xpathCtxt)
self._element = element
XPathEvaluatorBase.__init__(self, namespaces, extensions)
@@ -207,11 +210,12 @@
self._xpath = NULL
self.path = path
path = _utf8(path)
- self._xpath = xpath.xmlXPathCompile(_cstr(path))
+ self._xpathCtxt = xpath.xmlXPathNewContext(NULL)
+ _setupDict(self._xpathCtxt)
+ self._xpath = xpath.xmlXPathCtxtCompile(self._xpathCtxt, _cstr(path))
if self._xpath is NULL:
self._raise_parse_error()
self._absolute = path.lstrip().startswith('/')
- self._xpathCtxt = xpath.xmlXPathNewContext(NULL)
def __call__(self, _etree_or_element, **_variables):
cdef xpath.xmlXPathContext* xpathCtxt
From scoder at codespeak.net Fri May 26 19:03:05 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 19:03:05 +0200 (CEST)
Subject: [Lxml-checkins] r27733 - lxml/trunk/src/lxml
Message-ID: <20060526170305.C5A7210088@code0.codespeak.net>
Author: scoder
Date: Fri May 26 19:03:04 2006
New Revision: 27733
Modified:
lxml/trunk/src/lxml/xpath.pxi
Log:
factored out test for absolute paths into evaluator base class method
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Fri May 26 19:03:04 2006
@@ -58,6 +58,20 @@
if self._xpathCtxt is not NULL:
xpath.xmlXPathFreeContext(self._xpathCtxt)
+ cdef int _checkAbsolutePath(self, char* path):
+ cdef char c
+ if path is NULL:
+ return 0
+ c = path[0]
+ while c != c'\0':
+ if c == c'/':
+ return 1
+ elif c != c' ' and c != c'\t':
+ break
+ path = path + 1
+ c = path[0]
+ return 0
+
cdef _raise_parse_error(self):
if self._xpathCtxt is not NULL and \
self._xpathCtxt.lastError.message is not NULL:
@@ -133,8 +147,10 @@
cdef xpath.xmlXPathContext* xpathCtxt
cdef xpath.xmlXPathObject* xpathObj
cdef _Document doc
+ cdef char* c_path
path = _utf8(_path)
- if path.lstrip().startswith('/'):
+ c_path = _cstr(path)
+ if self._checkAbsolutePath(c_path):
raise LxmlSyntaxError, "cannot use absolute path on element"
xpathCtxt = self._xpathCtxt
xpathCtxt.node = self._element._c_node
@@ -143,7 +159,7 @@
self._context.register_context(xpathCtxt, doc)
try:
self._context.registerVariables(_variables)
- xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt)
+ xpathObj = xpath.xmlXPathEvalExpression(c_path, xpathCtxt)
finally:
self._context.unregister_context()
@@ -206,16 +222,18 @@
cdef int _absolute
def __init__(self, path, namespaces=None, extensions=None):
+ cdef char* c_path
XPathEvaluatorBase.__init__(self, namespaces, extensions, None)
self._xpath = NULL
self.path = path
path = _utf8(path)
+ c_path = _cstr(path)
self._xpathCtxt = xpath.xmlXPathNewContext(NULL)
_setupDict(self._xpathCtxt)
- self._xpath = xpath.xmlXPathCtxtCompile(self._xpathCtxt, _cstr(path))
+ self._xpath = xpath.xmlXPathCtxtCompile(self._xpathCtxt, c_path)
if self._xpath is NULL:
self._raise_parse_error()
- self._absolute = path.lstrip().startswith('/')
+ self._absolute = self._checkAbsolutePath(c_path)
def __call__(self, _etree_or_element, **_variables):
cdef xpath.xmlXPathContext* xpathCtxt
From scoder at codespeak.net Fri May 26 19:07:19 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 19:07:19 +0200 (CEST)
Subject: [Lxml-checkins] r27734 - lxml/trunk/src/lxml
Message-ID: <20060526170719.DA54610088@code0.codespeak.net>
Author: scoder
Date: Fri May 26 19:07:18 2006
New Revision: 27734
Modified:
lxml/trunk/src/lxml/xpath.pxi
Log:
cleanup
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Fri May 26 19:07:18 2006
@@ -63,14 +63,10 @@
if path is NULL:
return 0
c = path[0]
- while c != c'\0':
- if c == c'/':
- return 1
- elif c != c' ' and c != c'\t':
- break
+ while c == c' ' or c == c'\t':
path = path + 1
c = path[0]
- return 0
+ return c == c'/'
cdef _raise_parse_error(self):
if self._xpathCtxt is not NULL and \
From scoder at codespeak.net Fri May 26 19:26:50 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 19:26:50 +0200 (CEST)
Subject: [Lxml-checkins] r27736 - lxml/trunk/src/lxml/tests
Message-ID: <20060526172650.47C2E10088@code0.codespeak.net>
Author: scoder
Date: Fri May 26 19:26:48 2006
New Revision: 27736
Modified:
lxml/trunk/src/lxml/tests/test_xpathevaluator.py
Log:
test cases for exceptions on calling absolute XPath expression on an Element
Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original)
+++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Fri May 26 19:26:48 2006
@@ -329,6 +329,13 @@
def test_xpath_elementtree_error(self):
self.assertRaises(ValueError, etree.XPath('*'), etree.ElementTree())
+ def test_xpath_element_absolute_error(self):
+ self.assertRaises(ValueError, etree.XPath(' / * '), etree.Element("test"))
+
+ def test_xpath_element_absolute_error2(self):
+ el = etree.Element("test")
+ self.assertRaises(SyntaxError, el.xpath, ' /* ')
+
class ETreeETXPathClassTestCase(HelperTestCase):
"Tests for the ETXPath class"
def test_xpath_compile_ns(self):
From scoder at codespeak.net Fri May 26 19:39:54 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 19:39:54 +0200 (CEST)
Subject: [Lxml-checkins] r27737 - lxml/trunk/src/lxml
Message-ID: <20060526173954.8E9881008D@code0.codespeak.net>
Author: scoder
Date: Fri May 26 19:39:53 2006
New Revision: 27737
Modified:
lxml/trunk/src/lxml/xpath.pxi
Log:
cleanup
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Fri May 26 19:39:53 2006
@@ -280,7 +280,7 @@
cdef _nsextract_path(self, path):
# replace {namespaces} by new prefixes
cdef int i
- path_utf = path.encode('UTF-8')
+ path_utf = _utf8(path)
stripped_path = _replace_strings('', path_utf) # remove string literals
namespaces = {}
namespace_defs = []
From scoder at codespeak.net Fri May 26 21:37:48 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 21:37:48 +0200 (CEST)
Subject: [Lxml-checkins] r27741 - in lxml/trunk: . doc src/lxml
src/lxml/tests
Message-ID: <20060526193748.E34A010097@code0.codespeak.net>
Author: scoder
Date: Fri May 26 21:37:45 2006
New Revision: 27741
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/api.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_etree.py
lxml/trunk/src/lxml/tests/test_xpathevaluator.py
lxml/trunk/src/lxml/xpath.pxi
Log:
new method Element.getroottree() to return root ElementTree of the document, make absolute XPaths available to elements again, define element.xpath('/...') as element.getroottree().xpath('/...')
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri May 26 21:37:45 2006
@@ -7,6 +7,9 @@
Features added
--------------
+* ``element.getroottree()`` returns an ElementTree for the root node of the
+ document that contains the element.
+
* ElementTree.getpath(element) returns a simple, absolute XPath expression to
find the element in the tree structure
@@ -20,8 +23,8 @@
* Document reference in ElementTree objects was not updated when the root
element was moved to a different document
-* Running absolute XPath expressions on Elements now raises an exception in
- most cases. Otherwise, the behaviour is explicitly marked as undefined.
+* Running absolute XPath expressions on an Elements now correctly evaluates
+ against the root tree
* Evaluating absolute XPath expressions ('/*') on an ElementTree could fail
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Fri May 26 21:37:45 2006
@@ -197,17 +197,44 @@
As an extension, these classes also provide an ``xpath()`` method that
supports expressions in the complete XPath syntax.
-For ElementTree, the xpath method performs a global xpath query against the
-document (if absolute) or against the root node (if relative).
+For ElementTree, the xpath method performs a global XPath query against the
+document (if absolute) or against the root node (if relative)::
-When xpath is used on an element, the xpath expression is performed taking the
-element as the xpath context node. Note that it is illegal to run an absolute
-XPath expression (like ``/a``) against an element. The result is undefined.
-
-You call the xpath() method with the XPath expression to use. Optionally, you
-can provide a second argument, which should be a dictionary mapping the
-namespace prefixes used in the XPath expression to namespace URIs. The
-optional third argument is used to define `extension functions`_.
+ >>> f = StringIO(' ')
+ >>> tree = etree.parse(f)
+
+ >>> r = tree.xpath('/foo/bar')
+ >>> len(r)
+ 1
+ >>> r[0].tag
+ 'bar'
+
+ >>> r = tree.xpath('bar')
+ >>> r[0].tag
+ 'bar'
+
+When ``xpath()`` is used on an element, the XPath expression is evaluated
+against the element (if relative) or against the root tree (if absolute)::
+
+ >>> root = tree.getroot()
+ >>> r = root.xpath('bar')
+ >>> r[0].tag
+ 'bar'
+
+ >>> bar = root[0]
+ >>> r = bar.xpath('/foo/bar')
+ >>> r[0].tag
+ 'bar'
+
+ >>> tree = bar.getroottree()
+ >>> r = tree.xpath('/foo/bar')
+ >>> r[0].tag
+ 'bar'
+
+Optionally, you can provide a ``namespaces`` keyword argument, which should be
+a dictionary mapping the namespace prefixes used in the XPath expression to
+namespace URIs. The optional ``extensions`` argument is used to define
+`extension functions`_ in Python.
.. _`extension functions`: extensions.html
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Fri May 26 21:37:45 2006
@@ -440,11 +440,12 @@
return root.findall(path)
# extensions to ElementTree API
- def xpath(self, _path, namespaces=None, **_variables):
+ def xpath(self, _path, namespaces=None, extensions=None, **_variables):
"""XPath evaluate in context of document.
- namespaces is an optional dictionary with prefix to namespace URI
- mappings, used by XPath.
+ ``namespaces`` is an optional dictionary with prefix to namespace URI
+ mappings, used by XPath. ``extensions`` defines additional extension
+ functions.
Returns a list (nodeset), or bool, float or string.
@@ -456,7 +457,7 @@
XPathEvaluator directly.
"""
self._assertHasRoot()
- evaluator = XPathDocumentEvaluator(self, namespaces)
+ evaluator = XPathDocumentEvaluator(self, namespaces, extensions)
return evaluator.evaluate(_path, **_variables)
def xslt(self, _xslt, extensions=None, **_kw):
@@ -923,6 +924,11 @@
return _elementFactory(self._doc, c_node)
return None
+ def getroottree(self):
+ """Return an ElementTree for the root node of the document that
+ contains this element."""
+ return _elementTreeFactory(self._doc, None)
+
def getiterator(self, tag=None):
return ElementDepthFirstIterator(self, tag)
@@ -950,8 +956,8 @@
def findall(self, path):
return _elementpath.findall(self, path)
- def xpath(self, _path, namespaces=None, **_variables):
- evaluator = XPathElementEvaluator(self, namespaces)
+ def xpath(self, _path, namespaces=None, extensions=None, **_variables):
+ evaluator = XPathElementEvaluator(self, namespaces, extensions)
return evaluator.evaluate(_path, **_variables)
cdef _Element _elementFactory(_Document doc, xmlNode* c_node):
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Fri May 26 21:37:45 2006
@@ -264,6 +264,24 @@
b,
d.getparent())
+ def test_getroottree(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+ d = SubElement(b, 'd')
+ self.assertEquals(
+ a,
+ a.getroottree().getroot())
+ self.assertEquals(
+ a,
+ b.getroottree().getroot())
+ self.assertEquals(
+ a,
+ d.getroottree().getroot())
+
def test_parseid(self):
parseid = self.etree.parseid
XML = self.etree.XML
Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original)
+++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Fri May 26 21:37:45 2006
@@ -74,8 +74,8 @@
c = root[0]
self.assertEquals([c[0], c[1]],
c.xpath('b'))
- self.assertEquals([c[0], c[1]],
- c.xpath('.//b'))
+ self.assertEquals([c[0], c[1], root[1][0]],
+ c.xpath('//b'))
def test_xpath_ns(self):
tree = self.parse(' ')
@@ -88,10 +88,10 @@
tree.xpath('//foo:b', {'foo': 'uri:c'}))
self.assertEquals(
[root[0]],
- root.xpath('.//baz:b', {'baz': 'uri:a'}))
+ root.xpath('//baz:b', {'baz': 'uri:a'}))
self.assertRaises(
TypeError,
- root.xpath, './/b', {None: 'uri:a'})
+ root.xpath, '//b', {None: 'uri:a'})
def test_xpath_error(self):
tree = self.parse(' ')
@@ -329,13 +329,6 @@
def test_xpath_elementtree_error(self):
self.assertRaises(ValueError, etree.XPath('*'), etree.ElementTree())
- def test_xpath_element_absolute_error(self):
- self.assertRaises(ValueError, etree.XPath(' / * '), etree.Element("test"))
-
- def test_xpath_element_absolute_error2(self):
- el = etree.Element("test")
- self.assertRaises(SyntaxError, el.xpath, ' /* ')
-
class ETreeETXPathClassTestCase(HelperTestCase):
"Tests for the ETXPath class"
def test_xpath_compile_ns(self):
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Fri May 26 21:37:45 2006
@@ -100,8 +100,8 @@
cdef class XPathElementEvaluator(XPathEvaluatorBase):
"""Create an XPath evaluator for an element.
- Note that the result of evaluating absolute XPath expressions (starting
- with '/') is undefined for Elements. Use an ElementTree instead.
+ Absolute XPath expressions (starting with '/') will be evaluated against
+ the ElementTree as returned by getroottree().
XPath evaluators must not be shared between threads.
"""
@@ -137,17 +137,14 @@
Variables may be provided as keyword arguments. Note that namespaces
are currently not supported for variables.
- The result of evaluating absolute XPath expressions (starting with
- '/') is undefined for Elements. Use an ElementTree instead.
+ Absolute XPath expressions (starting with '/') will be evaluated
+ against the ElementTree as returned by getroottree().
"""
cdef xpath.xmlXPathContext* xpathCtxt
cdef xpath.xmlXPathObject* xpathObj
cdef _Document doc
cdef char* c_path
path = _utf8(_path)
- c_path = _cstr(path)
- if self._checkAbsolutePath(c_path):
- raise LxmlSyntaxError, "cannot use absolute path on element"
xpathCtxt = self._xpathCtxt
xpathCtxt.node = self._element._c_node
doc = self._element._doc
@@ -155,7 +152,7 @@
self._context.register_context(xpathCtxt, doc)
try:
self._context.registerVariables(_variables)
- xpathObj = xpath.xmlXPathEvalExpression(c_path, xpathCtxt)
+ xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt)
finally:
self._context.unregister_context()
@@ -200,9 +197,7 @@
def XPathEvaluator(etree_or_element, namespaces=None, extensions=None):
- """Creates and XPath evaluator for an ElementTree or an Element. Note
- that the result of absolute XPath expressions (starting with '/') is
- undefined for Elements. Use an ElementTree instead.
+ """Creates and XPath evaluator for an ElementTree or an Element.
XPath evaluators must not be shared between threads.
"""
@@ -215,7 +210,6 @@
cdef class XPath(XPathEvaluatorBase):
cdef xpath.xmlXPathCompExpr* _xpath
cdef readonly object path
- cdef int _absolute
def __init__(self, path, namespaces=None, extensions=None):
cdef char* c_path
@@ -229,7 +223,6 @@
self._xpath = xpath.xmlXPathCtxtCompile(self._xpathCtxt, c_path)
if self._xpath is NULL:
self._raise_parse_error()
- self._absolute = self._checkAbsolutePath(c_path)
def __call__(self, _etree_or_element, **_variables):
cdef xpath.xmlXPathContext* xpathCtxt
@@ -240,9 +233,6 @@
document = _documentOrRaise(_etree_or_element)
element = _rootNodeOrRaise(_etree_or_element)
-
- if self._absolute and element is _etree_or_element:
- raise ValueError, "cannot use absolute path on element"
xpathCtxt = self._xpathCtxt
xpathCtxt.doc = document._c_doc
From scoder at codespeak.net Fri May 26 21:41:56 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 21:41:56 +0200 (CEST)
Subject: [Lxml-checkins] r27742 - lxml/trunk
Message-ID: <20060526194156.E917010097@code0.codespeak.net>
Author: scoder
Date: Fri May 26 21:41:55 2006
New Revision: 27742
Modified:
lxml/trunk/CHANGES.txt
Log:
typo
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri May 26 21:41:55 2006
@@ -23,8 +23,8 @@
* Document reference in ElementTree objects was not updated when the root
element was moved to a different document
-* Running absolute XPath expressions on an Elements now correctly evaluates
- against the root tree
+* Running absolute XPath expressions on an Element now evaluates against the
+ root tree
* Evaluating absolute XPath expressions ('/*') on an ElementTree could fail
From scoder at codespeak.net Fri May 26 22:16:18 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 26 May 2006 22:16:18 +0200 (CEST)
Subject: [Lxml-checkins] r27746 - lxml/trunk/src/lxml
Message-ID: <20060526201618.C9AA91009A@code0.codespeak.net>
Author: scoder
Date: Fri May 26 22:16:17 2006
New Revision: 27746
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
make True/False a little more constant
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Fri May 26 22:16:17 2006
@@ -8,6 +8,12 @@
cimport cstd
import re
+import __builtin__
+cdef object True
+cdef object False
+True = __builtin__.True
+False = __builtin__.False
+
import _elementpath
from StringIO import StringIO
import sys
From scoder at codespeak.net Sat May 27 06:36:25 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 27 May 2006 06:36:25 +0200 (CEST)
Subject: [Lxml-checkins] r27748 - lxml/trunk
Message-ID: <20060527043625.7A343100AD@code0.codespeak.net>
Author: scoder
Date: Sat May 27 06:36:21 2006
New Revision: 27748
Modified:
lxml/trunk/CREDITS.txt
Log:
cleanup and updates in CREDITS.txt
Modified: lxml/trunk/CREDITS.txt
==============================================================================
--- lxml/trunk/CREDITS.txt (original)
+++ lxml/trunk/CREDITS.txt Sat May 27 06:36:21 2006
@@ -1,52 +1,58 @@
Credits
-------
-Martijn Faassen - initial and main developer
+Martijn Faassen - initial main developer
-Marc-Antoine Parent - XPath extension function help and patches
+Stefan Behnel - main developer and maintainer
-Stefan Behnel - core development work (SAX support, misc patches)
+Marc-Antoine Parent - XPath extension function help and patches
Olivier Grisel - improved (c)ElementTree compatibility patches,
website improvements.
+Kasimier Buchcik - help with specs and libxml2
+
Florian Wagner - help with copy.deepcopy support, bug reporting
Emil Kroymann - help with encoding support, bug reporting
Slou - help with index() support, bug reporting
-Duncan Booth - bugfixing
-
Paul Everitt - bug reporting, feedback on API design
-Julien Anguenot - bug reporting
-
Paul Clifford - Python 2.2 compatibility fixes
-Wade Leftwich - unicode bug reporting
+Victor Ng - Discussions on memory management strategies, vlibxml2
-Henrik Thostrup Jensen - bug reporting
+Robert Kern - feedback on API design
-dharana - bug reporting
+Trent Mick - setup.py patch
-Hamish Lawson - bug reporting
+Steve Howe - Windows builds
-Gavrie Philipson - bug reporting
+David Sankel - building statically on Windows
-Victor Ng - Discussions on memory management strategies, vlibxml2
+Duncan Booth - bugfixing
-Robert Kern - feedback on API design
+Dean Pavlekovic - bug reporting
+
+Julien Anguenot - bug reporting
+
+Wade Leftwich - unicode bug reporting
Kieran Holland - iteration crash bug report
-Trent Mick - setup.py patch
+Henrik Thostrup Jensen - bug reporting
-Steve Howe - Windows builds
+dharana - bug reporting
+
+Hamish Lawson - bug reporting
+
+Gavrie Philipson - bug reporting
-David Sankel - building statically on Windows
Thanks also to:
+---------------
* the libxml2 project for a great XML library.
From scoder at codespeak.net Sat May 27 12:11:41 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 27 May 2006 12:11:41 +0200 (CEST)
Subject: [Lxml-checkins] r27751 - lxml/trunk/doc
Message-ID: <20060527101141.96D8A100B3@code0.codespeak.net>
Author: scoder
Date: Sat May 27 12:11:38 2006
New Revision: 27751
Added:
lxml/trunk/doc/FAQ.txt
Log:
new FAQ.txt
Added: lxml/trunk/doc/FAQ.txt
==============================================================================
--- (empty file)
+++ lxml/trunk/doc/FAQ.txt Sat May 27 12:11:38 2006
@@ -0,0 +1,78 @@
+Frequently Asked Questions
+==========================
+
+See also the notes on compatibility_ to ElementTree_.
+
+.. _compatibility: compatibility.html
+.. _ElementTree: http://effbot.org/zone/element-index.htm
+
+
+1) Is there a tutorial?
+
+ There is a `tutorial for ElementTree`_ which also works for lxml.etree.
+ The `API documentation`_ also contains many examples.
+
+ .. _`tutorial for ElementTree`: http://effbot.org/zone/element.htm
+ .. _`API documentation`: api.html
+
+
+2) Where can I find more documentation about lxml?
+
+ There is a lot of documentation as lxml implements the well-known
+ `ElementTree API`_ and tries to follow its documentation as closely as
+ possible. There are a couple of issues where lxml cannot keep up
+ compatibility. They are described in the compatibility_ documentation.
+ The lxml specific extensions to the API are described by individual files
+ in the ``doc`` directory of the distribution and on `the web page`_.
+
+ .. _`ElementTree API`: http://effbot.org/zone/element-index.htm
+ .. _`the web page`: http://codespeak.net/lxml/#documentation
+
+
+3) Why are there ``findall()`` and ``xpath()`` methods on Element(Tree)?
+
+ ``findall()`` is specified in the `ElementTree API`_. It supports a
+ `simple subset of the XPath language`_, without predicates, conditions and
+ other advanced features. It is very handy for finding specific tags in a
+ tree. Another important difference is namespace handling, which uses the
+ ``{namespace}tagname`` notation. This is not supported by XPath. The
+ findall, find and findtext methods are compatible with other ElementTree
+ implementations and allow writing portable code that runs on ElementTree,
+ cElementTree and lxml.etree.
+
+ ``xpath()``, on the other hand, supports the complete power of the XPath
+ language, including predicates, XPath functions and Python extension
+ functions. The syntax is defined by the `XPath specification`_. If you
+ need the expressiveness and selectivity of XPath, the xpath method, the
+ ``XPath`` class and the ``XPathEvaluator`` are the best choice.
+
+ .. _`simple subset of the XPath language`: http://effbot.org/zone/element-xpath.htm
+ .. _`XPath specification`: http://www.w3.org/TR/xpath
+
+
+4) Why doesn't ``findall()`` support XPath expressions?
+
+ It was decided that it is more important to keep compatibility with
+ ElementTree_ to simplify code migration between the libraries. The main
+ difference compared to XPath is the ``{namespace}tagname`` notation used in
+ ``findall()``, which is not valid XPath.
+
+ ElementTree and lxml.etree use the same implementation, which assures 100%
+ compatibility. Note that ``findall()`` is `so fast`_ in lxml that a native
+ implementation would not bring any performance benefits.
+
+ .. _`so fast`: performance.html#tree-traversal
+
+
+5) Why is my application so slow?
+
+ lxml.etree is a very fast library for processing XML. There are, however,
+ `a few caveats`_ involved in the mapping of the powerful libxml2 library to
+ the simple and convenient ElementTree API. Not all operations are as fast
+ as the simplicity of the API might suggest. The `benchmark page`_ has a
+ comparison to other ElementTree implementations and a number of tips for
+ performance tweaking.
+
+ .. _`a few caveats`: performance.html#the-elementtree-api
+ .. _`benchmark page`: performance.html
+
From scoder at codespeak.net Sat May 27 12:18:28 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 27 May 2006 12:18:28 +0200 (CEST)
Subject: [Lxml-checkins] r27752 - lxml/trunk/doc
Message-ID: <20060527101828.D9C2F100B3@code0.codespeak.net>
Author: scoder
Date: Sat May 27 12:18:27 2006
New Revision: 27752
Modified:
lxml/trunk/doc/main.txt
Log:
link to FAQ from main doc page
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Sat May 27 12:18:27 2006
@@ -4,14 +4,16 @@
Introduction
------------
-lxml is a Pythonic binding for the libxml2_ and libxslt_ libraries. See
-the introduction_ for more information about background and goals.
+lxml is a Pythonic binding for the libxml2_ and libxslt_ libraries. See the
+introduction_ for more information about background and goals. Some common
+questions are answered in the FAQ_.
.. _libxml2: http://xmlsoft.org
-
.. _libxslt: http://xmlsoft.org/XSLT
.. _introduction: intro.html
+.. _FAQ: FAQ.html
+
Download
--------
@@ -72,6 +74,7 @@
.. _`browse it through the web`: http://codespeak.net/svn/lxml
.. _`latest CHANGES`: http://codespeak.net/svn/lxml/trunk/CHANGES.txt
+
Documentation
-------------
@@ -108,6 +111,7 @@
.. _`XSLT`: http://www.w3.org/TR/xslt
.. _`c14n`: http://www.w3.org/TR/xml-c14n
+
Mailing list
------------
@@ -115,6 +119,7 @@
.. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev
+
License
-------
From scoder at codespeak.net Sat May 27 12:25:00 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 27 May 2006 12:25:00 +0200 (CEST)
Subject: [Lxml-checkins] r27753 - lxml/trunk/doc
Message-ID: <20060527102500.4E0B7100B3@code0.codespeak.net>
Author: scoder
Date: Sat May 27 12:24:58 2006
New Revision: 27753
Modified:
lxml/trunk/doc/FAQ.txt
Log:
small updates in FAQ.txt
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Sat May 27 12:24:58 2006
@@ -29,7 +29,7 @@
.. _`the web page`: http://codespeak.net/lxml/#documentation
-3) Why are there ``findall()`` and ``xpath()`` methods on Element(Tree)?
+3) What are the ``findall()`` and ``xpath()`` methods on Element(Tree)?
``findall()`` is specified in the `ElementTree API`_. It supports a
`simple subset of the XPath language`_, without predicates, conditions and
@@ -43,14 +43,15 @@
``xpath()``, on the other hand, supports the complete power of the XPath
language, including predicates, XPath functions and Python extension
functions. The syntax is defined by the `XPath specification`_. If you
- need the expressiveness and selectivity of XPath, the xpath method, the
- ``XPath`` class and the ``XPathEvaluator`` are the best choice.
+ need the expressiveness and selectivity of XPath, the ``xpath()`` method,
+ the ``XPath`` class and the ``XPathEvaluator`` are the best choice_.
.. _`simple subset of the XPath language`: http://effbot.org/zone/element-xpath.htm
.. _`XPath specification`: http://www.w3.org/TR/xpath
+ .. _choice: performance.html#xpath
-4) Why doesn't ``findall()`` support XPath expressions?
+4) Why doesn't ``findall()`` support full XPath expressions?
It was decided that it is more important to keep compatibility with
ElementTree_ to simplify code migration between the libraries. The main
From scoder at codespeak.net Sat May 27 12:26:13 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 27 May 2006 12:26:13 +0200 (CEST)
Subject: [Lxml-checkins] r27754 - lxml/trunk/doc
Message-ID: <20060527102613.64A37100B3@code0.codespeak.net>
Author: scoder
Date: Sat May 27 12:26:12 2006
New Revision: 27754
Modified:
lxml/trunk/doc/FAQ.txt
Log:
small updates in FAQ.txt
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Sat May 27 12:26:12 2006
@@ -31,7 +31,7 @@
3) What are the ``findall()`` and ``xpath()`` methods on Element(Tree)?
- ``findall()`` is specified in the `ElementTree API`_. It supports a
+ ``findall()`` is part of the original `ElementTree API`_. It supports a
`simple subset of the XPath language`_, without predicates, conditions and
other advanced features. It is very handy for finding specific tags in a
tree. Another important difference is namespace handling, which uses the
From scoder at codespeak.net Sat May 27 12:27:50 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 27 May 2006 12:27:50 +0200 (CEST)
Subject: [Lxml-checkins] r27755 - lxml/trunk/doc
Message-ID: <20060527102750.22A8A100B3@code0.codespeak.net>
Author: scoder
Date: Sat May 27 12:27:48 2006
New Revision: 27755
Modified:
lxml/trunk/doc/performance.txt
Log:
typo
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Sat May 27 12:27:48 2006
@@ -131,7 +131,7 @@
cET: append_from_document (-- T3,T4) 0.0227 msec/pass
ET : append_from_document (-- T3,T4) 0.1563 msec/pass
-Although this are fairly small numbers compared to parsing, this easily shows
+Although these are fairly small numbers compared to parsing, this easily shows
the different performance classes for lxml and (c)ET. Where the latter do not
have to care about parent pointers and tree structures, lxml has to deep
traverse the appended tree. The performance difference therefore increases
From scoder at codespeak.net Sat May 27 12:30:55 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 27 May 2006 12:30:55 +0200 (CEST)
Subject: [Lxml-checkins] r27756 - lxml/trunk/doc
Message-ID: <20060527103055.2F59D100B3@code0.codespeak.net>
Author: scoder
Date: Sat May 27 12:30:53 2006
New Revision: 27756
Modified:
lxml/trunk/doc/performance.txt
Log:
state Python version in performance.txt
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Sat May 27 12:30:53 2006
@@ -12,8 +12,8 @@
.. _cElementTree: http://effbot.org/zone/celementtree.htm
The statements made here are backed by the benchmark script `bench.py`_ that
-comes with the lxml source distribution. The numbers that are cited below
-compare lxml 1.0, ElementTree 1.2.6 and cElementTree 1.0.5.
+comes with the lxml source distribution. The timings cited below compare lxml
+1.0, ElementTree 1.2.6 and cElementTree 1.0.5 under CPython 2.4.2.
.. _`bench.py`: http://codespeak.net/svn/lxml/trunk/bench.py
From scoder at codespeak.net Sat May 27 17:59:42 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 27 May 2006 17:59:42 +0200 (CEST)
Subject: [Lxml-checkins] r27769 - in lxml/trunk: doc src/lxml/tests
Message-ID: <20060527155942.AD84C100A0@code0.codespeak.net>
Author: scoder
Date: Sat May 27 17:59:40 2006
New Revision: 27769
Modified:
lxml/trunk/doc/FAQ.txt
lxml/trunk/src/lxml/tests/test_xslt.py
Log:
test case and FAQ entry for difference of str(xslt(doc)) and xslt(doc).write()
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Sat May 27 17:59:40 2006
@@ -65,7 +65,23 @@
.. _`so fast`: performance.html#tree-traversal
-5) Why is my application so slow?
+5) What is the difference between str(xslt(doc)) and xslt(doc).write() ?
+
+ The str() implementation knows about the output method chosen in the
+ stylesheet (xsl:output), write() doesn't. If you call write(), the result
+ will be a normal XML tree serialization in the requested encoding. Calling
+ this method may also fail for XSLT results that are not XML trees
+ (e.g. string results).
+
+ If you call str(), it will return the serialized result as specified by the
+ XSL transform. This correctly serializes string results to encoded Python
+ strings and honours ``xsl:output`` options like ``indent``. This almost
+ certainly does what you want, so you should only use ``write()`` if you are
+ sure that the XSLT result is an XML tree and you want to override the
+ encoding and indentation options requested by the stylesheet.
+
+
+6) Why is my application so slow?
lxml.etree is a very fast library for processing XML. There are, however,
`a few caveats`_ involved in the mapping of the powerful libxml2 library to
Modified: lxml/trunk/src/lxml/tests/test_xslt.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xslt.py (original)
+++ lxml/trunk/src/lxml/tests/test_xslt.py Sat May 27 17:59:40 2006
@@ -6,7 +6,7 @@
import unittest, doctest
-from common_imports import etree, HelperTestCase, fileInTestDir
+from common_imports import etree, StringIO, HelperTestCase, fileInTestDir
class ETreeXSLTTestCase(HelperTestCase):
"""XPath tests etree"""
@@ -73,6 +73,29 @@
self.assertEquals(expected,
unicode(str(res), 'UTF-16'))
+ def test_xslt_encoding_override(self):
+ tree = self.parse(u'\uF8D2 \uF8D2 ')
+ style = self.parse('''\
+
+
+
+
+
+ ''')
+
+ st = etree.XSLT(style)
+ res = st.apply(tree)
+ expected = u"""\
+
+\uF8D2 """
+
+ f = StringIO()
+ res.write(f, 'UTF-16')
+ result = unicode(f.getvalue(), 'UTF-16')
+ self.assertEquals(expected,
+ result)
+
def test_xslt_unicode(self):
tree = self.parse(u'\uF8D2 \uF8D2 ')
style = self.parse('''\
From scoder at codespeak.net Sun May 28 06:48:56 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 May 2006 06:48:56 +0200 (CEST)
Subject: [Lxml-checkins] r27779 - lxml/trunk/doc
Message-ID: <20060528044856.AB213100A8@code0.codespeak.net>
Author: scoder
Date: Sun May 28 06:48:53 2006
New Revision: 27779
Modified:
lxml/trunk/doc/FAQ.txt
Log:
clarification on XSLTResultTree
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Sun May 28 06:48:53 2006
@@ -67,11 +67,12 @@
5) What is the difference between str(xslt(doc)) and xslt(doc).write() ?
- The str() implementation knows about the output method chosen in the
- stylesheet (xsl:output), write() doesn't. If you call write(), the result
- will be a normal XML tree serialization in the requested encoding. Calling
- this method may also fail for XSLT results that are not XML trees
- (e.g. string results).
+ The str() implementation of the XSLTResultTree class (a subclass of
+ ElementTree) knows about the output method chosen in the stylesheet
+ (xsl:output), write() doesn't. If you call write(), the result will be a
+ normal XML tree serialization in the requested encoding. Calling this
+ method may also fail for XSLT results that are not XML trees (e.g. string
+ results).
If you call str(), it will return the serialized result as specified by the
XSL transform. This correctly serializes string results to encoded Python
From scoder at codespeak.net Sun May 28 07:20:16 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 May 2006 07:20:16 +0200 (CEST)
Subject: [Lxml-checkins] r27780 - lxml/trunk/doc
Message-ID: <20060528052016.3ACFF100AB@code0.codespeak.net>
Author: scoder
Date: Sun May 28 07:20:14 2006
New Revision: 27780
Modified:
lxml/trunk/doc/FAQ.txt
Log:
FAQ entry on crashes and bug reporting
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Sun May 28 07:20:14 2006
@@ -7,7 +7,7 @@
.. _ElementTree: http://effbot.org/zone/element-index.htm
-1) Is there a tutorial?
+#) Is there a tutorial?
There is a `tutorial for ElementTree`_ which also works for lxml.etree.
The `API documentation`_ also contains many examples.
@@ -16,7 +16,7 @@
.. _`API documentation`: api.html
-2) Where can I find more documentation about lxml?
+#) Where can I find more documentation about lxml?
There is a lot of documentation as lxml implements the well-known
`ElementTree API`_ and tries to follow its documentation as closely as
@@ -29,7 +29,39 @@
.. _`the web page`: http://codespeak.net/lxml/#documentation
-3) What are the ``findall()`` and ``xpath()`` methods on Element(Tree)?
+#) My application crashes! Why does lxml.etree do that?
+
+ a) If you are using threads, make sure that you are not sharing non
+ thread-safe objects between threads. Especially the default parser,
+ XSLT() and the validators are not thread-safe for performance reasons.
+ You have to create a new one for each thread, use a thread-safe object
+ pool or assure thread-safe access to them yourself.
+
+ b) One of the goals of lxml is "no segfaults", so if there is no clear
+ warning in the documentation that you were doing something potentially
+ harmful, you have found a bug and we would like to hear about it.
+ Please report this bug to the mailing list. See the next section on how
+ to do that.
+
+
+#) I think I have found a bug in lxml. What should I do?
+
+ a) First, you should look at the `current developer changelog`_ to see if
+ this is a known problem that has already been fixed in the SVN trunk.
+
+ .. _`current developer changelog`: http://codespeak.net/svn/lxml/trunk/CHANGES.txt
+
+ b) Otherwise, we would really like to hear about it. Please report it to
+ the `mailing list`_ so that we can fix it. It is very helpful in this
+ case if you can come up with a short code snippet that demonstrates your
+ problem. Please also report the version of lxml, libxml2 and libxslt
+ that you are using (see the module attributes ``etree.LXML_VERSION``
+ etc.).
+
+ .. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev
+
+
+#) What are the ``findall()`` and ``xpath()`` methods on Element(Tree)?
``findall()`` is part of the original `ElementTree API`_. It supports a
`simple subset of the XPath language`_, without predicates, conditions and
@@ -51,7 +83,7 @@
.. _choice: performance.html#xpath
-4) Why doesn't ``findall()`` support full XPath expressions?
+#) Why doesn't ``findall()`` support full XPath expressions?
It was decided that it is more important to keep compatibility with
ElementTree_ to simplify code migration between the libraries. The main
@@ -65,7 +97,7 @@
.. _`so fast`: performance.html#tree-traversal
-5) What is the difference between str(xslt(doc)) and xslt(doc).write() ?
+#) What is the difference between str(xslt(doc)) and xslt(doc).write() ?
The str() implementation of the XSLTResultTree class (a subclass of
ElementTree) knows about the output method chosen in the stylesheet
@@ -82,7 +114,7 @@
encoding and indentation options requested by the stylesheet.
-6) Why is my application so slow?
+#) Why is my application so slow?
lxml.etree is a very fast library for processing XML. There are, however,
`a few caveats`_ involved in the mapping of the powerful libxml2 library to
From scoder at codespeak.net Sun May 28 09:32:13 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 May 2006 09:32:13 +0200 (CEST)
Subject: [Lxml-checkins] r27781 - lxml/trunk/doc
Message-ID: <20060528073213.5CDBA100AC@code0.codespeak.net>
Author: scoder
Date: Sun May 28 09:32:12 2006
New Revision: 27781
Modified:
lxml/trunk/doc/api.txt
Log:
cleaner test case
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Sun May 28 09:32:12 2006
@@ -79,8 +79,8 @@
>>> xml_header = ''
>>> xhtml = xml_header + doctype_string + ''
- >>> et = etree.parse(StringIO(xhtml))
- >>> docinfo = et.docinfo
+ >>> tree = etree.parse(StringIO(xhtml))
+ >>> docinfo = tree.docinfo
>>> print docinfo.public_id
-//W3C//DTD XHTML 1.0 Transitional//EN
>>> print docinfo.system_url
From scoder at codespeak.net Sun May 28 09:32:58 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 May 2006 09:32:58 +0200 (CEST)
Subject: [Lxml-checkins] r27782 - lxml/trunk/src/lxml/tests
Message-ID: <20060528073258.1BCD9100AD@code0.codespeak.net>
Author: scoder
Date: Sun May 28 09:32:55 2006
New Revision: 27782
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
new parser test case that fails in libxml2 <= 2.6.22
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Sun May 28 09:32:55 2006
@@ -1699,7 +1699,14 @@
' ',
tree.getroot()
)
-
+
+ def test_parse_with_encoding(self):
+ # this can fail in libxml2 <= 2.6.22
+ parse = self.etree.parse
+ tree = parse(StringIO(' '))
+ self.assertXML('',
+ tree.getroot())
+
def test_encoding(self):
Element = self.etree.Element
From scoder at codespeak.net Sun May 28 10:27:59 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 May 2006 10:27:59 +0200 (CEST)
Subject: [Lxml-checkins] r27783 - in lxml/trunk: . src/lxml
Message-ID: <20060528082759.CF840100B5@code0.codespeak.net>
Author: scoder
Date: Sun May 28 10:27:57 2006
New Revision: 27783
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/parser.pxi
Log:
fix parsing strings with encoding declaration under libxml2 <= 2.6.22
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sun May 28 10:27:57 2006
@@ -20,6 +20,9 @@
Bugs fixed
----------
+* On libxml2 <= 2.6.22, parsing strings with encoding declaration could fail
+ in certain cases
+
* Document reference in ElementTree objects was not updated when the root
element was moved to a different document
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Sun May 28 10:27:57 2006
@@ -295,12 +295,11 @@
cdef Py_ssize_t py_buffer_len
cdef int buffer_len
cdef char* c_text
- cdef char* c_encoding
- cdef int enc
py_buffer_len = python.PyUnicode_GET_DATA_SIZE(utext)
if py_buffer_len > python.INT_MAX:
text_utf = _utf8(utext)
- return self._parseDoc(text_utf, c_filename)
+ py_buffer_len = python.PyString_GET_SIZE(text_utf)
+ return self._parseDoc(_cstr(text_utf), py_buffer_len, c_filename)
buffer_len = py_buffer_len
self._error_log.connect()
@@ -321,22 +320,26 @@
recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
return _handleParseResult(pctxt, result, NULL, recover)
- cdef xmlDoc* _parseDoc(self, char* c_text, char* c_filename) except NULL:
+ cdef xmlDoc* _parseDoc(self, char* c_text, Py_ssize_t c_len,
+ char* c_filename) except NULL:
"""Parse document, share dictionary if possible.
"""
cdef xmlDoc* result
cdef xmlParserCtxt* pctxt
cdef int recover
+ if c_len > python.INT_MAX:
+ raise ParserError, "string is too long to parse it with libxml2"
+
self._error_log.connect()
pctxt = self._parser_ctxt
__GLOBAL_PARSER_CONTEXT._initParserDict(pctxt)
if self._parser_type == LXML_HTML_PARSER:
- result = htmlparser.htmlCtxtReadDoc(
- pctxt, c_text, c_filename, NULL, self._parse_options)
+ result = htmlparser.htmlCtxtReadMemory(
+ pctxt, c_text, c_len, c_filename, NULL, self._parse_options)
else:
- result = xmlparser.xmlCtxtReadDoc(
- pctxt, c_text, c_filename, NULL, self._parse_options)
+ result = xmlparser.xmlCtxtReadMemory(
+ pctxt, c_text, c_len, c_filename, NULL, self._parse_options)
self._error_log.disconnect()
recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
@@ -589,6 +592,8 @@
cdef xmlDoc* _parseDoc(text, filename, _BaseParser parser) except NULL:
cdef char* c_filename
+ cdef char* c_text
+ cdef Py_ssize_t c_len
if parser is None:
parser = __DEFAULT_PARSER
__GLOBAL_PARSER_CONTEXT._initParser()
@@ -599,7 +604,9 @@
if python.PyUnicode_Check(text):
return (<_BaseParser>parser)._parseUnicodeDoc(text, c_filename)
else:
- return (<_BaseParser>parser)._parseDoc(_cstr(text), c_filename)
+ c_text = _cstr(text)
+ c_len = python.PyString_GET_SIZE(text)
+ return (<_BaseParser>parser)._parseDoc(c_text, c_len, c_filename)
cdef xmlDoc* _parseDocFromFile(filename, _BaseParser parser) except NULL:
if parser is None:
From scoder at codespeak.net Sun May 28 19:53:31 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 May 2006 19:53:31 +0200 (CEST)
Subject: [Lxml-checkins] r27795 - in lxml/trunk: . doc src/lxml
Message-ID: <20060528175331.69BD310036@code0.codespeak.net>
Author: scoder
Date: Sun May 28 19:53:28 2006
New Revision: 27795
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/api.txt
lxml/trunk/src/lxml/xmlerror.pxi
Log:
make logging to Python stdlib logging package work
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sun May 28 19:53:28 2006
@@ -7,6 +7,8 @@
Features added
--------------
+* PyErrorLog for error logging through the Python ``logging`` module
+
* ``element.getroottree()`` returns an ElementTree for the root node of the
document that contains the element.
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Sun May 28 19:53:28 2006
@@ -123,12 +123,20 @@
PARSER ERR_TAG_NOT_FINISHED
There is also a convenience attribute ``last_error`` that returns the last
-error::
+error or fatal error that occurred::
>>> entry = e.error_log.last_error
>>> print entry.domain_name, entry.type_name, entry.filename
PARSER ERR_TAG_NOT_FINISHED
+Alternatively, lxml.etree supports logging libxml2 messages to the Python
+stdlib logging module. This is done through the ``etree.PyErrorLog`` class.
+It disables the error reporting from exceptions and forwards log messages to a
+Python logger. To use it, see the descriptions of the function
+``etree.useGlobalPythonLog`` and the class ``etree.PyErrorLog`` for help.
+Note that this does not affect the local error logs of XSLT, XMLSchema,
+etc. which are described in their respective sections below.
+
Python unicode strings
----------------------
@@ -462,7 +470,7 @@
it for relevant messages::
>>> log = relaxng.error_log
- >>> print log.filter_from_errors()
+ >>> print log.last_error
:1:ERROR:RELAXNGV:ERR_LT_IN_ATTRIBUTE: Did not expect element c there
You can see that the error (ERROR) happened during RelaxNG validation
@@ -541,13 +549,15 @@
Error reporting works like for the RelaxNG class::
>>> log = xmlschema.error_log
- >>> errors = log.filter_from_errors()
- >>> print errors[0].domain_name
+ >>> error = log.last_error
+ >>> print error.domain_name
SCHEMASV
- >>> print errors[0].type_name
+ >>> print error.type_name
SCHEMAV_ELEMENT_CONTENT
-If you were to print this log entry, you would get something like the following::
+If you were to print this log entry, you would get something like the
+following. Note that the error message depends on the libxml2 version in
+use::
:1:ERROR::SCHEMAV_ELEMENT_CONTENT: Element 'c': This element is not expected. Expected is ( b ).
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Sun May 28 19:53:28 2006
@@ -30,7 +30,7 @@
cdef readonly object level
cdef readonly object message
cdef readonly object filename
- cdef _set(self, xmlerror.xmlError* error):
+ cdef _setError(self, xmlerror.xmlError* error):
self.domain = error.domain
self.type = error.code
self.level = error.level
@@ -52,14 +52,9 @@
self.filename = filename
def __repr__(self):
- if self.filename:
- return "%s:%d:%s:%s:%s: %s" % (
- self.filename, self.line, self.level_name,
- self.domain_name, self.type_name, self.message)
- else:
- return "[]:%s:%s:%s: %s" % (
- self.level_name, self.domain_name,
- self.type_name, self.message)
+ return "%s:%d:%s:%s:%s: %s" % (
+ self.filename, self.line, self.level_name,
+ self.domain_name, self.type_name, self.message)
property domain_name:
def __get__(self):
@@ -74,15 +69,55 @@
return ErrorLevels._names[self.level]
cdef class _BaseErrorLog:
- "Immutable base version of an error log."
- cdef object _entries
cdef readonly object last_error
+ def __init__(self, last_error=None):
+ self.last_error = last_error
+
+ def copy(self):
+ return _BaseErrorLog(self.last_error)
+
+ def __repr__(self):
+ return ''
+
+ cdef void _receive(self, xmlerror.xmlError* error):
+ cdef int is_error
+ cdef _LogEntry entry
+ entry = _LogEntry()
+ entry._setError(error)
+ is_error = error.level == xmlerror.XML_ERR_ERROR or \
+ error.level == xmlerror.XML_ERR_FATAL
+ if __GLOBAL_ERROR_LOG is not self:
+ __GLOBAL_ERROR_LOG.receive(entry)
+ if is_error:
+ __GLOBAL_ERROR_LOG.last_error = entry
+ self.receive(entry)
+ if is_error:
+ self.last_error = entry
+
+ cdef void _receiveGeneric(self, int domain, int type, int level, int line,
+ message, filename):
+ cdef _LogEntry entry
+ entry = _LogEntry()
+ entry._setGeneric(domain, type, level, line, message, filename)
+ is_error = level == xmlerror.XML_ERR_ERROR or \
+ level == xmlerror.XML_ERR_FATAL
+ if __GLOBAL_ERROR_LOG is not self:
+ __GLOBAL_ERROR_LOG.receive(entry)
+ if is_error:
+ __GLOBAL_ERROR_LOG.last_error = entry
+ self.receive(entry)
+ if is_error:
+ self.last_error = entry
+
+cdef class _ListErrorLog(_BaseErrorLog):
+ "Immutable base version of a list based error log."
+ cdef object _entries
def __init__(self, entries, last_error=None):
+ _BaseErrorLog.__init__(self, last_error)
self._entries = entries
- self.last_error = last_error
def copy(self):
- return _BaseErrorLog(self._entries, self.last_error)
+ return _ListErrorLog(self._entries, self.last_error)
def __iter__(self):
return iter(self._entries)
@@ -104,7 +139,7 @@
for entry in self._entries:
if entry.domain in domains:
python.PyList_Append(filtered, entry)
- return _BaseErrorLog(filtered)
+ return _ListErrorLog(filtered)
def filter_types(self, types):
cdef _LogEntry entry
@@ -114,7 +149,7 @@
for entry in self._entries:
if entry.type in types:
python.PyList_Append(filtered, entry)
- return _BaseErrorLog(filtered)
+ return _ListErrorLog(filtered)
def filter_levels(self, levels):
"""Return a log with all messages of the requested level(s). Takes a
@@ -126,7 +161,7 @@
for entry in self._entries:
if entry.level in levels:
python.PyList_Append(filtered, entry)
- return _BaseErrorLog(filtered)
+ return _ListErrorLog(filtered)
def filter_from_level(self, level):
"Return a log with all messages of the requested level of worse."
@@ -135,7 +170,7 @@
for entry in self._entries:
if entry.level >= level:
python.PyList_Append(filtered, entry)
- return _BaseErrorLog(filtered)
+ return _ListErrorLog(filtered)
def filter_from_fatals(self):
"Convenience method to get all fatal error messages."
@@ -149,7 +184,10 @@
"Convenience method to get all warnings or worse."
return self.filter_from_level(ErrorLevels.WARNING)
-cdef class _ExtensibleErrorLog(_BaseErrorLog):
+cdef class _ErrorLog(_ListErrorLog):
+ def __init__(self):
+ _ListErrorLog.__init__(self, [])
+
cdef void connect(self):
del self._entries[:]
xmlerror.xmlSetStructuredErrorFunc(self, _receiveError)
@@ -157,45 +195,11 @@
cdef void disconnect(self):
xmlerror.xmlSetStructuredErrorFunc(NULL, _receiveError)
- cdef void _receive(self, xmlerror.xmlError* error):
- cdef int is_error
- cdef _LogEntry entry
- entry = _LogEntry()
- entry._set(error)
- is_error = error.level == xmlerror.XML_ERR_ERROR or \
- error.level == xmlerror.XML_ERR_FATAL
- if __GLOBAL_ERROR_LOG is not self:
- __GLOBAL_ERROR_LOG.receive(entry)
- if is_error:
- __GLOBAL_ERROR_LOG.last_error = entry
- self.receive(entry)
- if is_error:
- self.last_error = entry
-
- cdef void _receiveGeneric(self, int domain, int type, int level, int line,
- message, filename):
- cdef _LogEntry entry
- entry = _LogEntry()
- entry._setGeneric(domain, type, level, line, message, filename)
- is_error = level == xmlerror.XML_ERR_ERROR or \
- level == xmlerror.XML_ERR_FATAL
- if __GLOBAL_ERROR_LOG is not self:
- __GLOBAL_ERROR_LOG.receive(entry)
- if is_error:
- __GLOBAL_ERROR_LOG.last_error = entry
- self.receive(entry)
- if is_error:
- self.last_error = entry
-
-cdef class _ErrorLog(_ExtensibleErrorLog):
- def __init__(self):
- _ExtensibleErrorLog.__init__(self, [])
-
def clear(self):
del self._entries[:]
def copy(self):
- return _BaseErrorLog(self._entries[:], self.last_error)
+ return _ListErrorLog(self._entries[:], self.last_error)
def __iter__(self):
return iter(self._entries[:])
@@ -224,44 +228,72 @@
del entries[0]
python.PyList_Append(entries, entry)
-cdef class PyErrorLog(_ExtensibleErrorLog):
+cdef class PyErrorLog(_BaseErrorLog):
+ """A global error log that connects to the Python stdlib logging package.
+
+ The constructor accepts an optional logger name.
+
+ If you want to change the mapping between libxml2's ErrorLevels and Python
+ logging levels, you can modify the level_map dictionary from a subclass.
+
+ The default mapping is::
+
+ ErrorLevels.WARNING = logging.WARNING
+ ErrorLevels.ERROR = logging.ERROR
+ ErrorLevels.FATAL = logging.CRITICAL
+
+ You can also override the method ``receive()`` that takes a LogEntry
+ object and calls ``self.log(log_entry, format_string, arg1, arg2, ...)``
+ with appropriate data.
+ """
+ cdef public object level_map
cdef object _log
- cdef object _level_map
- cdef object _varsOf
def __init__(self, logger_name=None):
- _ExtensibleErrorLog.__init__(self, [])
+ _BaseErrorLog.__init__(self)
import logging
- self._level_map = {
+ self.level_map = {
ErrorLevels.WARNING : logging.WARNING,
ErrorLevels.ERROR : logging.ERROR,
ErrorLevels.FATAL : logging.CRITICAL
}
- self._varsOf = vars
if logger_name:
- logger = logging.getLogger(name)
+ logger = logging.getLogger(logger_name)
else:
logger = logging.getLogger()
self._log = logger.log
def copy(self):
- return self
+ return _ListErrorLog([])
- def receive(self, entry):
- py_level = self._level_map[entry.level]
+ def log(self, entry, message_format_string, *args):
self._log(
- py_level,
- "%(asctime)s %(levelname)s %(domain_name)s %(message)s",
- self._varsOf(entry)
+ self.level_map.get(entry.level, 0),
+ message_format_string, *args
)
-# global list to collect error output messages from libxml2/libxslt
-cdef _RotatingErrorLog __GLOBAL_ERROR_LOG
+ def receive(self, entry):
+ self.log(entry, entry)
+
+# global list log to collect error output messages from libxml2/libxslt
+cdef _BaseErrorLog __GLOBAL_ERROR_LOG
__GLOBAL_ERROR_LOG = _RotatingErrorLog(__MAX_LOG_SIZE)
-def __copyGlobalErrorLog():
+cdef __copyGlobalErrorLog():
"Helper function for properties in exceptions."
return __GLOBAL_ERROR_LOG.copy()
+def useGlobalPythonLog(PyErrorLog log not None):
+ """Replace the global error log by an etree.PyErrorLog that uses the
+ standard Python logging package.
+
+ Note that this slows down processing and disables access to the global
+ error log from exceptions. Parsers, XSLT etc. will continue to provide
+ their normal local error log.
+ """
+ global __GLOBAL_ERROR_LOG
+ __GLOBAL_ERROR_LOG = log
+
+
# local log function: forward error to logger object
cdef void _receiveError(void* c_log_handler, xmlerror.xmlError* error):
cdef _ErrorLog log_handler
From scoder at codespeak.net Sun May 28 19:55:08 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 May 2006 19:55:08 +0200 (CEST)
Subject: [Lxml-checkins] r27796 - lxml/trunk/src/lxml
Message-ID: <20060528175508.759AF10036@code0.codespeak.net>
Author: scoder
Date: Sun May 28 19:55:07 2006
New Revision: 27796
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
cleanup: reuse Pyrex initialized None values
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Sun May 28 19:55:07 2006
@@ -121,9 +121,6 @@
# class for temporarily storing exceptions raised in extensions
cdef class _ExceptionContext:
cdef object _exc_info
- def __init__(self):
- self._exc_info = None
-
cdef void clear(self):
self._exc_info = None
@@ -1207,9 +1204,7 @@
else:
c_node = _findChildForwards(node._c_node, 0)
self._next_element = _nextElement
- if c_node is NULL:
- self._node = None
- else:
+ if c_node is not NULL:
self._node = _elementFactory(node._doc, c_node)
def __iter__(self):
return self
From scoder at codespeak.net Sun May 28 20:15:38 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 May 2006 20:15:38 +0200 (CEST)
Subject: [Lxml-checkins] r27799 - lxml/trunk/doc
Message-ID: <20060528181538.EE04C1006B@code0.codespeak.net>
Author: scoder
Date: Sun May 28 20:15:32 2006
New Revision: 27799
Modified:
lxml/trunk/doc/api.txt
Log:
whitespace
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Sun May 28 20:15:32 2006
@@ -304,6 +304,7 @@
>>> tree.xpath(tree.getpath(d2)) == [d2]
True
+
XSLT
----
From scoder at codespeak.net Sun May 28 20:16:09 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 May 2006 20:16:09 +0200 (CEST)
Subject: [Lxml-checkins] r27800 - lxml/trunk/doc
Message-ID: <20060528181609.7F7A21006B@code0.codespeak.net>
Author: scoder
Date: Sun May 28 20:16:07 2006
New Revision: 27800
Modified:
lxml/trunk/doc/FAQ.txt
Log:
FAQ entry on threading
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Sun May 28 20:16:07 2006
@@ -61,6 +61,26 @@
.. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev
+#) Can I use threads to concurrently access the lxml API?
+
+ You should be able to use lxml in a multi-threaded environment, although
+ this is not very well tested. Note that lxml does not provide any
+ thread-safety by itself (mainly for performance reasons), so you have to
+ take care when you use parts of the API concurrently. Most importantly,
+ you must not forget to call ``etree.initThread()`` from each newly
+ generated thread to initialize lxml and libxml2 for the new thread context.
+ If you call API functions from a thread without having called this function
+ first, lxml can easily crash your program.
+
+ Basically none of the API classes is thread-safe, including parsers, XPath,
+ XSLT and the validators. You cannot use such an object concurrently.
+ However, it is perfectly viable to create independent instances for each
+ thread. This is a cheap thing to do for parsers, but more expensive for
+ XSLT and validators, which have to compile trees recursively. So you might
+ want to consider a thread pool approach or threaded processing chains to
+ reduce the overhead if you require threading here.
+
+
#) What are the ``findall()`` and ``xpath()`` methods on Element(Tree)?
``findall()`` is part of the original `ElementTree API`_. It supports a
From scoder at codespeak.net Mon May 29 10:48:30 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 29 May 2006 10:48:30 +0200 (CEST)
Subject: [Lxml-checkins] r27824 - in lxml/trunk: . src/lxml
Message-ID: <20060529084830.F020E10053@code0.codespeak.net>
Author: scoder
Date: Mon May 29 10:48:24 2006
New Revision: 27824
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/python.pxd
lxml/trunk/src/lxml/xmlerror.pxd
lxml/trunk/src/lxml/xmlerror.pxi
Log:
made error name lookup more robust, updating constants from xmlerror.h is easier now, also compiles faster
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Mon May 29 10:48:24 2006
@@ -22,6 +22,11 @@
Bugs fixed
----------
+* Unknown constants from newer libxml2 versions could raise exceptions in the
+ error handlers
+
+* lxml.etree compiles much faster
+
* On libxml2 <= 2.6.22, parsing strings with encoding declaration could fail
in certain cases
Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd (original)
+++ lxml/trunk/src/lxml/python.pxd Mon May 29 10:48:24 2006
@@ -50,6 +50,8 @@
cdef int PySequence_Check(object instance)
cdef int PyType_Check(object instance)
+ cdef int PyObject_SetAttr(object o, object name, object value)
+
cdef void* PyMem_Malloc(size_t size)
cdef void PyMem_Free(void* p)
Modified: lxml/trunk/src/lxml/xmlerror.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxd (original)
+++ lxml/trunk/src/lxml/xmlerror.pxd Mon May 29 10:48:24 2006
@@ -47,12 +47,12 @@
XML_FROM_CHECK = 24 # The error checking module
XML_FROM_WRITER = 25 # The xmlwriter module
XML_FROM_MODULE = 26 # The dynamically loaded module modu
-
+ XML_FROM_I18N = 27 # The module handling character conversion
ctypedef enum xmlParserErrors:
XML_ERR_OK = 0
- XML_ERR_INTERNAL_ERROR = 1
- XML_ERR_NO_MEMORY = 2
+ XML_ERR_INTERNAL_ERROR = 1 # 1
+ XML_ERR_NO_MEMORY = 2 # 2
XML_ERR_DOCUMENT_START = 3 # 3
XML_ERR_DOCUMENT_EMPTY = 4 # 4
XML_ERR_DOCUMENT_END = 5 # 5
@@ -152,10 +152,17 @@
XML_WAR_NS_URI = 99 # 99
XML_WAR_NS_URI_RELATIVE = 100 # 100
XML_ERR_MISSING_ENCODING = 101 # 101
+ XML_WAR_SPACE_VALUE = 102 # 102
+ XML_ERR_NOT_STANDALONE = 103 # 103
+ XML_ERR_ENTITY_PROCESSING = 104 # 104
+ XML_ERR_NOTATION_PROCESSING = 105 # 105
+ XML_WAR_NS_COLUMN = 106 # 106
+ XML_WAR_ENTITY_REDEFINED = 107 # 107
XML_NS_ERR_XML_NAMESPACE = 200
XML_NS_ERR_UNDEFINED_NAMESPACE = 201 # 201
XML_NS_ERR_QNAME = 202 # 202
XML_NS_ERR_ATTRIBUTE_REDEFINED = 203 # 203
+ XML_NS_ERR_EMPTY = 204 # 204
XML_DTD_ATTRIBUTE_DEFAULT = 500
XML_DTD_ATTRIBUTE_REDEFINED = 501 # 501
XML_DTD_ATTRIBUTE_VALUE = 502 # 502
@@ -610,6 +617,8 @@
XML_SCHEMAV_CVC_AU = 1874 # 1874
XML_SCHEMAV_CVC_TYPE_1 = 1875 # 1875
XML_SCHEMAV_CVC_TYPE_2 = 1876 # 1876
+ XML_SCHEMAV_CVC_IDC = 1877 # 1877
+ XML_SCHEMAV_CVC_WILDCARD = 1878 # 1878
XML_XPTR_UNKNOWN_SCHEME = 1900
XML_XPTR_CHILDSEQ_START = 1901 # 1901
XML_XPTR_EVAL_FAILED = 1902 # 1902
@@ -618,9 +627,12 @@
XML_C14N_REQUIRES_UTF8 = 1951 # 1951
XML_C14N_CREATE_STACK = 1952 # 1952
XML_C14N_INVALID_NODE = 1953 # 1953
+ XML_C14N_UNKNOW_NODE = 1954 # 1954
+ XML_C14N_RELATIVE_NAMESPACE = 1955 # 1955
XML_FTP_PASV_ANSWER = 2000
XML_FTP_EPSV_ANSWER = 2001 # 2001
XML_FTP_ACCNT = 2002 # 2002
+ XML_FTP_URL_SYNTAX = 2003 # 2003
XML_HTTP_URL_SYNTAX = 2020
XML_HTTP_USE_IP = 2021 # 2021
XML_HTTP_UNKNOWN_HOST = 2022 # 2022
@@ -704,6 +716,18 @@
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_3 = 3077 # 3077
XML_SCHEMAP_AU_PROPS_CORRECT_2 = 3078 # 3078
XML_SCHEMAP_A_PROPS_CORRECT_2 = 3079 # 3079
+ XML_SCHEMAP_C_PROPS_CORRECT = 3080 # 3080
+ XML_SCHEMAP_SRC_REDEFINE = 3081 # 3081
+ XML_SCHEMAP_SRC_IMPORT = 3082 # 3082
+ XML_SCHEMAP_WARN_SKIP_SCHEMA = 3083 # 3083
+ XML_SCHEMAP_WARN_UNLOCATED_SCHEMA = 3084 # 3084
+ XML_SCHEMAP_WARN_ATTR_REDECL_PROH = 3085 # 3085
+ XML_SCHEMAP_WARN_ATTR_POINTLESS_PROH = 3086 # 3085
+ XML_SCHEMAP_AG_PROPS_CORRECT = 3087 # 3086
+ XML_SCHEMAP_COS_CT_EXTENDS_1_2 = 3088 # 3087
+ XML_SCHEMAP_AU_PROPS_CORRECT = 3089 # 3088
+ XML_SCHEMAP_A_PROPS_CORRECT_3 = 3090 # 3089
+ XML_SCHEMAP_COS_ALL_LIMITED = 3091 # 3090
XML_MODULE_OPEN = 4900 # 4900
XML_MODULE_CLOSE = 4901 # 4901
XML_CHECK_FOUND_ELEMENT = 5000
@@ -744,6 +768,10 @@
XML_CHECK_OUTSIDE_DICT = 5035 # 5035
XML_CHECK_WRONG_NAME = 5036 # 5036
XML_CHECK_NAME_NOT_NULL = 5037 # 5037
- XML_CHECK_ = 5038 # 5033
- XML_CHECK_X = 5039 # 503
-
+ XML_I18N_NO_NAME = 6000
+ XML_I18N_NO_HANDLER = 6001 # 6001
+ XML_I18N_EXCESS_HANDLER = 6002 # 6002
+ XML_I18N_CONV_FAILED = 6003 # 6003
+ XML_I18N_NO_OUTPUT = 6004 # 6004
+ XML_CHECK_ = 6005 # 5033
+ XML_CHECK_X = 6006 # 503
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Mon May 29 10:48:24 2006
@@ -9,6 +9,8 @@
Note that this log is already bounded to a fixed size."""
__GLOBAL_ERROR_LOG.clear()
+# setup functions
+
cdef void _initThreadLogging():
"Setup logging for the current thread. Called from etree.initThread()."
# switch on line number reporting
@@ -58,15 +60,15 @@
property domain_name:
def __get__(self):
- return ErrorDomains._names[self.domain]
+ return ErrorDomains._getName(self.domain, "unknown")
property type_name:
def __get__(self):
- return ErrorTypes._names[self.type]
+ return ErrorTypes._getName(self.type, "unknown")
property level_name:
def __get__(self):
- return ErrorLevels._names[self.level]
+ return ErrorLevels._getName(self.level, "unknown")
cdef class _BaseErrorLog:
cdef readonly object last_error
@@ -374,757 +376,801 @@
xmlerror.xmlSetStructuredErrorFunc(NULL, _receiveError)
################################################################################
-## CONSTANTS FROM "xmlerror.pxd"
+## CONSTANTS FROM "xmlerror.h"
################################################################################
+cdef void __initErrorConstants():
+ find_constants = re.compile(r"\s*([a-zA-Z0-9_]+)\s*=\s*([0-9]+)").findall
+ for cls, constants, prefix in [(ErrorLevels, __ERROR_LEVELS, 'XML_ERR_'),
+ (ErrorDomains, __ERROR_DOMAINS, 'XML_FROM_'),
+ (ErrorTypes, __ERROR_TYPES, 'XML_')]:
+ prefix_len = len(prefix)
+ reverse_dict = {}
+ cls._names = reverse_dict
+ cls._getName = reverse_dict.get
+ for name, value in find_constants(constants):
+ if name[:prefix_len] == prefix and len(name) > prefix_len:
+ name = name[prefix_len:]
+ value = int(value)
+ python.PyObject_SetAttr(cls, name, value)
+ python.PyDict_SetItem(reverse_dict, value, name)
+
class ErrorLevels:
"Libxml2 error levels"
- _names = {}
- NONE = 0
- WARNING = 1 # A simple warning
- ERROR = 2 # A recoverable error
- FATAL = 3 # A fatal error
+
+cdef object __ERROR_LEVELS
+__ERROR_LEVELS = """
+ XML_ERR_NONE = 0
+ XML_ERR_WARNING = 1 : A simple warning
+ XML_ERR_ERROR = 2 : A recoverable error
+ XML_ERR_FATAL = 3 : A fatal error
+"""
class ErrorDomains:
"Libxml2 error domains"
- _names = {}
- NONE = 0
- PARSER = 1 # The XML parser
- TREE = 2 # The tree module
- NAMESPACE = 3 # The XML Namespace module
- DTD = 4 # The XML DTD validation with parser contex
- HTML = 5 # The HTML parser
- MEMORY = 6 # The memory allocator
- OUTPUT = 7 # The serialization code
- IO = 8 # The Input/Output stack
- FTP = 9 # The FTP module
- HTTP = 10 # The FTP module
- XINCLUDE = 11 # The XInclude processing
- XPATH = 12 # The XPath module
- XPOINTER = 13 # The XPointer module
- REGEXP = 14 # The regular expressions module
- DATATYPE = 15 # The W3C XML Schemas Datatype module
- SCHEMASP = 16 # The W3C XML Schemas parser module
- SCHEMASV = 17 # The W3C XML Schemas validation module
- RELAXNGP = 18 # The Relax-NG parser module
- RELAXNGV = 19 # The Relax-NG validator module
- CATALOG = 20 # The Catalog module
- C14N = 21 # The Canonicalization module
- XSLT = 22 # The XSLT engine from libxslt
- VALID = 23 # The XML DTD validation with valid context
- CHECK = 24 # The error checking module
- WRITER = 25 # The xmlwriter module
- MODULE = 26 # The dynamically loaded module modu
+
+cdef object __ERROR_DOMAINS
+__ERROR_DOMAINS = """
+ XML_FROM_NONE = 0
+ XML_FROM_PARSER = 1 : The XML parser
+ XML_FROM_TREE = 2 : The tree module
+ XML_FROM_NAMESPACE = 3 : The XML Namespace module
+ XML_FROM_DTD = 4 : The XML DTD validation with parser contex
+ XML_FROM_HTML = 5 : The HTML parser
+ XML_FROM_MEMORY = 6 : The memory allocator
+ XML_FROM_OUTPUT = 7 : The serialization code
+ XML_FROM_IO = 8 : The Input/Output stack
+ XML_FROM_FTP = 9 : The FTP module
+ XML_FROM_HTTP = 10 : The HTTP module
+ XML_FROM_XINCLUDE = 11 : The XInclude processing
+ XML_FROM_XPATH = 12 : The XPath module
+ XML_FROM_XPOINTER = 13 : The XPointer module
+ XML_FROM_REGEXP = 14 : The regular expressions module
+ XML_FROM_DATATYPE = 15 : The W3C XML Schemas Datatype module
+ XML_FROM_SCHEMASP = 16 : The W3C XML Schemas parser module
+ XML_FROM_SCHEMASV = 17 : The W3C XML Schemas validation module
+ XML_FROM_RELAXNGP = 18 : The Relax-NG parser module
+ XML_FROM_RELAXNGV = 19 : The Relax-NG validator module
+ XML_FROM_CATALOG = 20 : The Catalog module
+ XML_FROM_C14N = 21 : The Canonicalization module
+ XML_FROM_XSLT = 22 : The XSLT engine from libxslt
+ XML_FROM_VALID = 23 : The XML DTD validation with valid context
+ XML_FROM_CHECK = 24 : The error checking module
+ XML_FROM_WRITER = 25 : The xmlwriter module
+ XML_FROM_MODULE = 26 : The dynamically loaded module modul
+ XML_FROM_I18N = 27 : The module handling character conversion
+"""
class ErrorTypes:
"Libxml2 error types"
- _names = {}
- ERR_OK = 0
- ERR_INTERNAL_ERROR = 1
- ERR_NO_MEMORY = 2
- ERR_DOCUMENT_START = 3 # 3
- ERR_DOCUMENT_EMPTY = 4 # 4
- ERR_DOCUMENT_END = 5 # 5
- ERR_INVALID_HEX_CHARREF = 6 # 6
- ERR_INVALID_DEC_CHARREF = 7 # 7
- ERR_INVALID_CHARREF = 8 # 8
- ERR_INVALID_CHAR = 9 # 9
- ERR_CHARREF_AT_EOF = 10 # 10
- ERR_CHARREF_IN_PROLOG = 11 # 11
- ERR_CHARREF_IN_EPILOG = 12 # 12
- ERR_CHARREF_IN_DTD = 13 # 13
- ERR_ENTITYREF_AT_EOF = 14 # 14
- ERR_ENTITYREF_IN_PROLOG = 15 # 15
- ERR_ENTITYREF_IN_EPILOG = 16 # 16
- ERR_ENTITYREF_IN_DTD = 17 # 17
- ERR_PEREF_AT_EOF = 18 # 18
- ERR_PEREF_IN_PROLOG = 19 # 19
- ERR_PEREF_IN_EPILOG = 20 # 20
- ERR_PEREF_IN_INT_SUBSET = 21 # 21
- ERR_ENTITYREF_NO_NAME = 22 # 22
- ERR_ENTITYREF_SEMICOL_MISSING = 23 # 23
- ERR_PEREF_NO_NAME = 24 # 24
- ERR_PEREF_SEMICOL_MISSING = 25 # 25
- ERR_UNDECLARED_ENTITY = 26 # 26
- WAR_UNDECLARED_ENTITY = 27 # 27
- ERR_UNPARSED_ENTITY = 28 # 28
- ERR_ENTITY_IS_EXTERNAL = 29 # 29
- ERR_ENTITY_IS_PARAMETER = 30 # 30
- ERR_UNKNOWN_ENCODING = 31 # 31
- ERR_UNSUPPORTED_ENCODING = 32 # 32
- ERR_STRING_NOT_STARTED = 33 # 33
- ERR_STRING_NOT_CLOSED = 34 # 34
- ERR_NS_DECL_ERROR = 35 # 35
- ERR_ENTITY_NOT_STARTED = 36 # 36
- ERR_ENTITY_NOT_FINISHED = 37 # 37
- ERR_LT_IN_ATTRIBUTE = 38 # 38
- ERR_ATTRIBUTE_NOT_STARTED = 39 # 39
- ERR_ATTRIBUTE_NOT_FINISHED = 40 # 40
- ERR_ATTRIBUTE_WITHOUT_VALUE = 41 # 41
- ERR_ATTRIBUTE_REDEFINED = 42 # 42
- ERR_LITERAL_NOT_STARTED = 43 # 43
- ERR_LITERAL_NOT_FINISHED = 44 # 44
- ERR_COMMENT_NOT_FINISHED = 45 # 45
- ERR_PI_NOT_STARTED = 46 # 46
- ERR_PI_NOT_FINISHED = 47 # 47
- ERR_NOTATION_NOT_STARTED = 48 # 48
- ERR_NOTATION_NOT_FINISHED = 49 # 49
- ERR_ATTLIST_NOT_STARTED = 50 # 50
- ERR_ATTLIST_NOT_FINISHED = 51 # 51
- ERR_MIXED_NOT_STARTED = 52 # 52
- ERR_MIXED_NOT_FINISHED = 53 # 53
- ERR_ELEMCONTENT_NOT_STARTED = 54 # 54
- ERR_ELEMCONTENT_NOT_FINISHED = 55 # 55
- ERR_XMLDECL_NOT_STARTED = 56 # 56
- ERR_XMLDECL_NOT_FINISHED = 57 # 57
- ERR_CONDSEC_NOT_STARTED = 58 # 58
- ERR_CONDSEC_NOT_FINISHED = 59 # 59
- ERR_EXT_SUBSET_NOT_FINISHED = 60 # 60
- ERR_DOCTYPE_NOT_FINISHED = 61 # 61
- ERR_MISPLACED_CDATA_END = 62 # 62
- ERR_CDATA_NOT_FINISHED = 63 # 63
- ERR_RESERVED_XML_NAME = 64 # 64
- ERR_SPACE_REQUIRED = 65 # 65
- ERR_SEPARATOR_REQUIRED = 66 # 66
- ERR_NMTOKEN_REQUIRED = 67 # 67
- ERR_NAME_REQUIRED = 68 # 68
- ERR_PCDATA_REQUIRED = 69 # 69
- ERR_URI_REQUIRED = 70 # 70
- ERR_PUBID_REQUIRED = 71 # 71
- ERR_LT_REQUIRED = 72 # 72
- ERR_GT_REQUIRED = 73 # 73
- ERR_LTSLASH_REQUIRED = 74 # 74
- ERR_EQUAL_REQUIRED = 75 # 75
- ERR_TAG_NAME_MISMATCH = 76 # 76
- ERR_TAG_NOT_FINISHED = 77 # 77
- ERR_STANDALONE_VALUE = 78 # 78
- ERR_ENCODING_NAME = 79 # 79
- ERR_HYPHEN_IN_COMMENT = 80 # 80
- ERR_INVALID_ENCODING = 81 # 81
- ERR_EXT_ENTITY_STANDALONE = 82 # 82
- ERR_CONDSEC_INVALID = 83 # 83
- ERR_VALUE_REQUIRED = 84 # 84
- ERR_NOT_WELL_BALANCED = 85 # 85
- ERR_EXTRA_CONTENT = 86 # 86
- ERR_ENTITY_CHAR_ERROR = 87 # 87
- ERR_ENTITY_PE_INTERNAL = 88 # 88
- ERR_ENTITY_LOOP = 89 # 89
- ERR_ENTITY_BOUNDARY = 90 # 90
- ERR_INVALID_URI = 91 # 91
- ERR_URI_FRAGMENT = 92 # 92
- WAR_CATALOG_PI = 93 # 93
- ERR_NO_DTD = 94 # 94
- ERR_CONDSEC_INVALID_KEYWORD = 95 # 95
- ERR_VERSION_MISSING = 96 # 96
- WAR_UNKNOWN_VERSION = 97 # 97
- WAR_LANG_VALUE = 98 # 98
- WAR_NS_URI = 99 # 99
- WAR_NS_URI_RELATIVE = 100 # 100
- ERR_MISSING_ENCODING = 101 # 101
- NS_ERR_XML_NAMESPACE = 200
- NS_ERR_UNDEFINED_NAMESPACE = 201 # 201
- NS_ERR_QNAME = 202 # 202
- NS_ERR_ATTRIBUTE_REDEFINED = 203 # 203
- DTD_ATTRIBUTE_DEFAULT = 500
- DTD_ATTRIBUTE_REDEFINED = 501 # 501
- DTD_ATTRIBUTE_VALUE = 502 # 502
- DTD_CONTENT_ERROR = 503 # 503
- DTD_CONTENT_MODEL = 504 # 504
- DTD_CONTENT_NOT_DETERMINIST = 505 # 505
- DTD_DIFFERENT_PREFIX = 506 # 506
- DTD_ELEM_DEFAULT_NAMESPACE = 507 # 507
- DTD_ELEM_NAMESPACE = 508 # 508
- DTD_ELEM_REDEFINED = 509 # 509
- DTD_EMPTY_NOTATION = 510 # 510
- DTD_ENTITY_TYPE = 511 # 511
- DTD_ID_FIXED = 512 # 512
- DTD_ID_REDEFINED = 513 # 513
- DTD_ID_SUBSET = 514 # 514
- DTD_INVALID_CHILD = 515 # 515
- DTD_INVALID_DEFAULT = 516 # 516
- DTD_LOAD_ERROR = 517 # 517
- DTD_MISSING_ATTRIBUTE = 518 # 518
- DTD_MIXED_CORRUPT = 519 # 519
- DTD_MULTIPLE_ID = 520 # 520
- DTD_NO_DOC = 521 # 521
- DTD_NO_DTD = 522 # 522
- DTD_NO_ELEM_NAME = 523 # 523
- DTD_NO_PREFIX = 524 # 524
- DTD_NO_ROOT = 525 # 525
- DTD_NOTATION_REDEFINED = 526 # 526
- DTD_NOTATION_VALUE = 527 # 527
- DTD_NOT_EMPTY = 528 # 528
- DTD_NOT_PCDATA = 529 # 529
- DTD_NOT_STANDALONE = 530 # 530
- DTD_ROOT_NAME = 531 # 531
- DTD_STANDALONE_WHITE_SPACE = 532 # 532
- DTD_UNKNOWN_ATTRIBUTE = 533 # 533
- DTD_UNKNOWN_ELEM = 534 # 534
- DTD_UNKNOWN_ENTITY = 535 # 535
- DTD_UNKNOWN_ID = 536 # 536
- DTD_UNKNOWN_NOTATION = 537 # 537
- DTD_STANDALONE_DEFAULTED = 538 # 538
- DTD_XMLID_VALUE = 539 # 539
- DTD_XMLID_TYPE = 540 # 540
- HTML_STRUCURE_ERROR = 800
- HTML_UNKNOWN_TAG = 801 # 801
- RNGP_ANYNAME_ATTR_ANCESTOR = 1000
- RNGP_ATTR_CONFLICT = 1001 # 1001
- RNGP_ATTRIBUTE_CHILDREN = 1002 # 1002
- RNGP_ATTRIBUTE_CONTENT = 1003 # 1003
- RNGP_ATTRIBUTE_EMPTY = 1004 # 1004
- RNGP_ATTRIBUTE_NOOP = 1005 # 1005
- RNGP_CHOICE_CONTENT = 1006 # 1006
- RNGP_CHOICE_EMPTY = 1007 # 1007
- RNGP_CREATE_FAILURE = 1008 # 1008
- RNGP_DATA_CONTENT = 1009 # 1009
- RNGP_DEF_CHOICE_AND_INTERLEAVE = 1010 # 1010
- RNGP_DEFINE_CREATE_FAILED = 1011 # 1011
- RNGP_DEFINE_EMPTY = 1012 # 1012
- RNGP_DEFINE_MISSING = 1013 # 1013
- RNGP_DEFINE_NAME_MISSING = 1014 # 1014
- RNGP_ELEM_CONTENT_EMPTY = 1015 # 1015
- RNGP_ELEM_CONTENT_ERROR = 1016 # 1016
- RNGP_ELEMENT_EMPTY = 1017 # 1017
- RNGP_ELEMENT_CONTENT = 1018 # 1018
- RNGP_ELEMENT_NAME = 1019 # 1019
- RNGP_ELEMENT_NO_CONTENT = 1020 # 1020
- RNGP_ELEM_TEXT_CONFLICT = 1021 # 1021
- RNGP_EMPTY = 1022 # 1022
- RNGP_EMPTY_CONSTRUCT = 1023 # 1023
- RNGP_EMPTY_CONTENT = 1024 # 1024
- RNGP_EMPTY_NOT_EMPTY = 1025 # 1025
- RNGP_ERROR_TYPE_LIB = 1026 # 1026
- RNGP_EXCEPT_EMPTY = 1027 # 1027
- RNGP_EXCEPT_MISSING = 1028 # 1028
- RNGP_EXCEPT_MULTIPLE = 1029 # 1029
- RNGP_EXCEPT_NO_CONTENT = 1030 # 1030
- RNGP_EXTERNALREF_EMTPY = 1031 # 1031
- RNGP_EXTERNAL_REF_FAILURE = 1032 # 1032
- RNGP_EXTERNALREF_RECURSE = 1033 # 1033
- RNGP_FORBIDDEN_ATTRIBUTE = 1034 # 1034
- RNGP_FOREIGN_ELEMENT = 1035 # 1035
- RNGP_GRAMMAR_CONTENT = 1036 # 1036
- RNGP_GRAMMAR_EMPTY = 1037 # 1037
- RNGP_GRAMMAR_MISSING = 1038 # 1038
- RNGP_GRAMMAR_NO_START = 1039 # 1039
- RNGP_GROUP_ATTR_CONFLICT = 1040 # 1040
- RNGP_HREF_ERROR = 1041 # 1041
- RNGP_INCLUDE_EMPTY = 1042 # 1042
- RNGP_INCLUDE_FAILURE = 1043 # 1043
- RNGP_INCLUDE_RECURSE = 1044 # 1044
- RNGP_INTERLEAVE_ADD = 1045 # 1045
- RNGP_INTERLEAVE_CREATE_FAILED = 1046 # 1046
- RNGP_INTERLEAVE_EMPTY = 1047 # 1047
- RNGP_INTERLEAVE_NO_CONTENT = 1048 # 1048
- RNGP_INVALID_DEFINE_NAME = 1049 # 1049
- RNGP_INVALID_URI = 1050 # 1050
- RNGP_INVALID_VALUE = 1051 # 1051
- RNGP_MISSING_HREF = 1052 # 1052
- RNGP_NAME_MISSING = 1053 # 1053
- RNGP_NEED_COMBINE = 1054 # 1054
- RNGP_NOTALLOWED_NOT_EMPTY = 1055 # 1055
- RNGP_NSNAME_ATTR_ANCESTOR = 1056 # 1056
- RNGP_NSNAME_NO_NS = 1057 # 1057
- RNGP_PARAM_FORBIDDEN = 1058 # 1058
- RNGP_PARAM_NAME_MISSING = 1059 # 1059
- RNGP_PARENTREF_CREATE_FAILED = 1060 # 1060
- RNGP_PARENTREF_NAME_INVALID = 1061 # 1061
- RNGP_PARENTREF_NO_NAME = 1062 # 1062
- RNGP_PARENTREF_NO_PARENT = 1063 # 1063
- RNGP_PARENTREF_NOT_EMPTY = 1064 # 1064
- RNGP_PARSE_ERROR = 1065 # 1065
- RNGP_PAT_ANYNAME_EXCEPT_ANYNAME = 1066 # 1066
- RNGP_PAT_ATTR_ATTR = 1067 # 1067
- RNGP_PAT_ATTR_ELEM = 1068 # 1068
- RNGP_PAT_DATA_EXCEPT_ATTR = 1069 # 1069
- RNGP_PAT_DATA_EXCEPT_ELEM = 1070 # 1070
- RNGP_PAT_DATA_EXCEPT_EMPTY = 1071 # 1071
- RNGP_PAT_DATA_EXCEPT_GROUP = 1072 # 1072
- RNGP_PAT_DATA_EXCEPT_INTERLEAVE = 1073 # 1073
- RNGP_PAT_DATA_EXCEPT_LIST = 1074 # 1074
- RNGP_PAT_DATA_EXCEPT_ONEMORE = 1075 # 1075
- RNGP_PAT_DATA_EXCEPT_REF = 1076 # 1076
- RNGP_PAT_DATA_EXCEPT_TEXT = 1077 # 1077
- RNGP_PAT_LIST_ATTR = 1078 # 1078
- RNGP_PAT_LIST_ELEM = 1079 # 1079
- RNGP_PAT_LIST_INTERLEAVE = 1080 # 1080
- RNGP_PAT_LIST_LIST = 1081 # 1081
- RNGP_PAT_LIST_REF = 1082 # 1082
- RNGP_PAT_LIST_TEXT = 1083 # 1083
- RNGP_PAT_NSNAME_EXCEPT_ANYNAME = 1084 # 1084
- RNGP_PAT_NSNAME_EXCEPT_NSNAME = 1085 # 1085
- RNGP_PAT_ONEMORE_GROUP_ATTR = 1086 # 1086
- RNGP_PAT_ONEMORE_INTERLEAVE_ATTR = 1087 # 1087
- RNGP_PAT_START_ATTR = 1088 # 1088
- RNGP_PAT_START_DATA = 1089 # 1089
- RNGP_PAT_START_EMPTY = 1090 # 1090
- RNGP_PAT_START_GROUP = 1091 # 1091
- RNGP_PAT_START_INTERLEAVE = 1092 # 1092
- RNGP_PAT_START_LIST = 1093 # 1093
- RNGP_PAT_START_ONEMORE = 1094 # 1094
- RNGP_PAT_START_TEXT = 1095 # 1095
- RNGP_PAT_START_VALUE = 1096 # 1096
- RNGP_PREFIX_UNDEFINED = 1097 # 1097
- RNGP_REF_CREATE_FAILED = 1098 # 1098
- RNGP_REF_CYCLE = 1099 # 1099
- RNGP_REF_NAME_INVALID = 1100 # 1100
- RNGP_REF_NO_DEF = 1101 # 1101
- RNGP_REF_NO_NAME = 1102 # 1102
- RNGP_REF_NOT_EMPTY = 1103 # 1103
- RNGP_START_CHOICE_AND_INTERLEAVE = 1104 # 1104
- RNGP_START_CONTENT = 1105 # 1105
- RNGP_START_EMPTY = 1106 # 1106
- RNGP_START_MISSING = 1107 # 1107
- RNGP_TEXT_EXPECTED = 1108 # 1108
- RNGP_TEXT_HAS_CHILD = 1109 # 1109
- RNGP_TYPE_MISSING = 1110 # 1110
- RNGP_TYPE_NOT_FOUND = 1111 # 1111
- RNGP_TYPE_VALUE = 1112 # 1112
- RNGP_UNKNOWN_ATTRIBUTE = 1113 # 1113
- RNGP_UNKNOWN_COMBINE = 1114 # 1114
- RNGP_UNKNOWN_CONSTRUCT = 1115 # 1115
- RNGP_UNKNOWN_TYPE_LIB = 1116 # 1116
- RNGP_URI_FRAGMENT = 1117 # 1117
- RNGP_URI_NOT_ABSOLUTE = 1118 # 1118
- RNGP_VALUE_EMPTY = 1119 # 1119
- RNGP_VALUE_NO_CONTENT = 1120 # 1120
- RNGP_XMLNS_NAME = 1121 # 1121
- RNGP_XML_NS = 1122 # 1122
- XPATH_EXPRESSION_OK = 1200
- XPATH_NUMBER_ERROR = 1201 # 1201
- XPATH_UNFINISHED_LITERAL_ERROR = 1202 # 1202
- XPATH_START_LITERAL_ERROR = 1203 # 1203
- XPATH_VARIABLE_REF_ERROR = 1204 # 1204
- XPATH_UNDEF_VARIABLE_ERROR = 1205 # 1205
- XPATH_INVALID_PREDICATE_ERROR = 1206 # 1206
- XPATH_EXPR_ERROR = 1207 # 1207
- XPATH_UNCLOSED_ERROR = 1208 # 1208
- XPATH_UNKNOWN_FUNC_ERROR = 1209 # 1209
- XPATH_INVALID_OPERAND = 1210 # 1210
- XPATH_INVALID_TYPE = 1211 # 1211
- XPATH_INVALID_ARITY = 1212 # 1212
- XPATH_INVALID_CTXT_SIZE = 1213 # 1213
- XPATH_INVALID_CTXT_POSITION = 1214 # 1214
- XPATH_MEMORY_ERROR = 1215 # 1215
- XPTR_SYNTAX_ERROR = 1216 # 1216
- XPTR_RESOURCE_ERROR = 1217 # 1217
- XPTR_SUB_RESOURCE_ERROR = 1218 # 1218
- XPATH_UNDEF_PREFIX_ERROR = 1219 # 1219
- XPATH_ENCODING_ERROR = 1220 # 1220
- XPATH_INVALID_CHAR_ERROR = 1221 # 1221
- TREE_INVALID_HEX = 1300
- TREE_INVALID_DEC = 1301 # 1301
- TREE_UNTERMINATED_ENTITY = 1302 # 1302
- SAVE_NOT_UTF8 = 1400
- SAVE_CHAR_INVALID = 1401 # 1401
- SAVE_NO_DOCTYPE = 1402 # 1402
- SAVE_UNKNOWN_ENCODING = 1403 # 1403
- REGEXP_COMPILE_ERROR = 1450
- IO_UNKNOWN = 1500
- IO_EACCES = 1501 # 1501
- IO_EAGAIN = 1502 # 1502
- IO_EBADF = 1503 # 1503
- IO_EBADMSG = 1504 # 1504
- IO_EBUSY = 1505 # 1505
- IO_ECANCELED = 1506 # 1506
- IO_ECHILD = 1507 # 1507
- IO_EDEADLK = 1508 # 1508
- IO_EDOM = 1509 # 1509
- IO_EEXIST = 1510 # 1510
- IO_EFAULT = 1511 # 1511
- IO_EFBIG = 1512 # 1512
- IO_EINPROGRESS = 1513 # 1513
- IO_EINTR = 1514 # 1514
- IO_EINVAL = 1515 # 1515
- IO_EIO = 1516 # 1516
- IO_EISDIR = 1517 # 1517
- IO_EMFILE = 1518 # 1518
- IO_EMLINK = 1519 # 1519
- IO_EMSGSIZE = 1520 # 1520
- IO_ENAMETOOLONG = 1521 # 1521
- IO_ENFILE = 1522 # 1522
- IO_ENODEV = 1523 # 1523
- IO_ENOENT = 1524 # 1524
- IO_ENOEXEC = 1525 # 1525
- IO_ENOLCK = 1526 # 1526
- IO_ENOMEM = 1527 # 1527
- IO_ENOSPC = 1528 # 1528
- IO_ENOSYS = 1529 # 1529
- IO_ENOTDIR = 1530 # 1530
- IO_ENOTEMPTY = 1531 # 1531
- IO_ENOTSUP = 1532 # 1532
- IO_ENOTTY = 1533 # 1533
- IO_ENXIO = 1534 # 1534
- IO_EPERM = 1535 # 1535
- IO_EPIPE = 1536 # 1536
- IO_ERANGE = 1537 # 1537
- IO_EROFS = 1538 # 1538
- IO_ESPIPE = 1539 # 1539
- IO_ESRCH = 1540 # 1540
- IO_ETIMEDOUT = 1541 # 1541
- IO_EXDEV = 1542 # 1542
- IO_NETWORK_ATTEMPT = 1543 # 1543
- IO_ENCODER = 1544 # 1544
- IO_FLUSH = 1545 # 1545
- IO_WRITE = 1546 # 1546
- IO_NO_INPUT = 1547 # 1547
- IO_BUFFER_FULL = 1548 # 1548
- IO_LOAD_ERROR = 1549 # 1549
- IO_ENOTSOCK = 1550 # 1550
- IO_EISCONN = 1551 # 1551
- IO_ECONNREFUSED = 1552 # 1552
- IO_ENETUNREACH = 1553 # 1553
- IO_EADDRINUSE = 1554 # 1554
- IO_EALREADY = 1555 # 1555
- IO_EAFNOSUPPORT = 1556 # 1556
- XINCLUDE_RECURSION = 1600
- XINCLUDE_PARSE_VALUE = 1601 # 1601
- XINCLUDE_ENTITY_DEF_MISMATCH = 1602 # 1602
- XINCLUDE_NO_HREF = 1603 # 1603
- XINCLUDE_NO_FALLBACK = 1604 # 1604
- XINCLUDE_HREF_URI = 1605 # 1605
- XINCLUDE_TEXT_FRAGMENT = 1606 # 1606
- XINCLUDE_TEXT_DOCUMENT = 1607 # 1607
- XINCLUDE_INVALID_CHAR = 1608 # 1608
- XINCLUDE_BUILD_FAILED = 1609 # 1609
- XINCLUDE_UNKNOWN_ENCODING = 1610 # 1610
- XINCLUDE_MULTIPLE_ROOT = 1611 # 1611
- XINCLUDE_XPTR_FAILED = 1612 # 1612
- XINCLUDE_XPTR_RESULT = 1613 # 1613
- XINCLUDE_INCLUDE_IN_INCLUDE = 1614 # 1614
- XINCLUDE_FALLBACKS_IN_INCLUDE = 1615 # 1615
- XINCLUDE_FALLBACK_NOT_IN_INCLUDE = 1616 # 1616
- XINCLUDE_DEPRECATED_NS = 1617 # 1617
- XINCLUDE_FRAGMENT_ID = 1618 # 1618
- CATALOG_MISSING_ATTR = 1650
- CATALOG_ENTRY_BROKEN = 1651 # 1651
- CATALOG_PREFER_VALUE = 1652 # 1652
- CATALOG_NOT_CATALOG = 1653 # 1653
- CATALOG_RECURSION = 1654 # 1654
- SCHEMAP_PREFIX_UNDEFINED = 1700
- SCHEMAP_ATTRFORMDEFAULT_VALUE = 1701 # 1701
- SCHEMAP_ATTRGRP_NONAME_NOREF = 1702 # 1702
- SCHEMAP_ATTR_NONAME_NOREF = 1703 # 1703
- SCHEMAP_COMPLEXTYPE_NONAME_NOREF = 1704 # 1704
- SCHEMAP_ELEMFORMDEFAULT_VALUE = 1705 # 1705
- SCHEMAP_ELEM_NONAME_NOREF = 1706 # 1706
- SCHEMAP_EXTENSION_NO_BASE = 1707 # 1707
- SCHEMAP_FACET_NO_VALUE = 1708 # 1708
- SCHEMAP_FAILED_BUILD_IMPORT = 1709 # 1709
- SCHEMAP_GROUP_NONAME_NOREF = 1710 # 1710
- SCHEMAP_IMPORT_NAMESPACE_NOT_URI = 1711 # 1711
- SCHEMAP_IMPORT_REDEFINE_NSNAME = 1712 # 1712
- SCHEMAP_IMPORT_SCHEMA_NOT_URI = 1713 # 1713
- SCHEMAP_INVALID_BOOLEAN = 1714 # 1714
- SCHEMAP_INVALID_ENUM = 1715 # 1715
- SCHEMAP_INVALID_FACET = 1716 # 1716
- SCHEMAP_INVALID_FACET_VALUE = 1717 # 1717
- SCHEMAP_INVALID_MAXOCCURS = 1718 # 1718
- SCHEMAP_INVALID_MINOCCURS = 1719 # 1719
- SCHEMAP_INVALID_REF_AND_SUBTYPE = 1720 # 1720
- SCHEMAP_INVALID_WHITE_SPACE = 1721 # 1721
- SCHEMAP_NOATTR_NOREF = 1722 # 1722
- SCHEMAP_NOTATION_NO_NAME = 1723 # 1723
- SCHEMAP_NOTYPE_NOREF = 1724 # 1724
- SCHEMAP_REF_AND_SUBTYPE = 1725 # 1725
- SCHEMAP_RESTRICTION_NONAME_NOREF = 1726 # 1726
- SCHEMAP_SIMPLETYPE_NONAME = 1727 # 1727
- SCHEMAP_TYPE_AND_SUBTYPE = 1728 # 1728
- SCHEMAP_UNKNOWN_ALL_CHILD = 1729 # 1729
- SCHEMAP_UNKNOWN_ANYATTRIBUTE_CHILD = 1730 # 1730
- SCHEMAP_UNKNOWN_ATTR_CHILD = 1731 # 1731
- SCHEMAP_UNKNOWN_ATTRGRP_CHILD = 1732 # 1732
- SCHEMAP_UNKNOWN_ATTRIBUTE_GROUP = 1733 # 1733
- SCHEMAP_UNKNOWN_BASE_TYPE = 1734 # 1734
- SCHEMAP_UNKNOWN_CHOICE_CHILD = 1735 # 1735
- SCHEMAP_UNKNOWN_COMPLEXCONTENT_CHILD = 1736 # 1736
- SCHEMAP_UNKNOWN_COMPLEXTYPE_CHILD = 1737 # 1737
- SCHEMAP_UNKNOWN_ELEM_CHILD = 1738 # 1738
- SCHEMAP_UNKNOWN_EXTENSION_CHILD = 1739 # 1739
- SCHEMAP_UNKNOWN_FACET_CHILD = 1740 # 1740
- SCHEMAP_UNKNOWN_FACET_TYPE = 1741 # 1741
- SCHEMAP_UNKNOWN_GROUP_CHILD = 1742 # 1742
- SCHEMAP_UNKNOWN_IMPORT_CHILD = 1743 # 1743
- SCHEMAP_UNKNOWN_LIST_CHILD = 1744 # 1744
- SCHEMAP_UNKNOWN_NOTATION_CHILD = 1745 # 1745
- SCHEMAP_UNKNOWN_PROCESSCONTENT_CHILD = 1746 # 1746
- SCHEMAP_UNKNOWN_REF = 1747 # 1747
- SCHEMAP_UNKNOWN_RESTRICTION_CHILD = 1748 # 1748
- SCHEMAP_UNKNOWN_SCHEMAS_CHILD = 1749 # 1749
- SCHEMAP_UNKNOWN_SEQUENCE_CHILD = 1750 # 1750
- SCHEMAP_UNKNOWN_SIMPLECONTENT_CHILD = 1751 # 1751
- SCHEMAP_UNKNOWN_SIMPLETYPE_CHILD = 1752 # 1752
- SCHEMAP_UNKNOWN_TYPE = 1753 # 1753
- SCHEMAP_UNKNOWN_UNION_CHILD = 1754 # 1754
- SCHEMAP_ELEM_DEFAULT_FIXED = 1755 # 1755
- SCHEMAP_REGEXP_INVALID = 1756 # 1756
- SCHEMAP_FAILED_LOAD = 1757 # 1757
- SCHEMAP_NOTHING_TO_PARSE = 1758 # 1758
- SCHEMAP_NOROOT = 1759 # 1759
- SCHEMAP_REDEFINED_GROUP = 1760 # 1760
- SCHEMAP_REDEFINED_TYPE = 1761 # 1761
- SCHEMAP_REDEFINED_ELEMENT = 1762 # 1762
- SCHEMAP_REDEFINED_ATTRGROUP = 1763 # 1763
- SCHEMAP_REDEFINED_ATTR = 1764 # 1764
- SCHEMAP_REDEFINED_NOTATION = 1765 # 1765
- SCHEMAP_FAILED_PARSE = 1766 # 1766
- SCHEMAP_UNKNOWN_PREFIX = 1767 # 1767
- SCHEMAP_DEF_AND_PREFIX = 1768 # 1768
- SCHEMAP_UNKNOWN_INCLUDE_CHILD = 1769 # 1769
- SCHEMAP_INCLUDE_SCHEMA_NOT_URI = 1770 # 1770
- SCHEMAP_INCLUDE_SCHEMA_NO_URI = 1771 # 1771
- SCHEMAP_NOT_SCHEMA = 1772 # 1772
- SCHEMAP_UNKNOWN_MEMBER_TYPE = 1773 # 1773
- SCHEMAP_INVALID_ATTR_USE = 1774 # 1774
- SCHEMAP_RECURSIVE = 1775 # 1775
- SCHEMAP_SUPERNUMEROUS_LIST_ITEM_TYPE = 1776 # 1776
- SCHEMAP_INVALID_ATTR_COMBINATION = 1777 # 1777
- SCHEMAP_INVALID_ATTR_INLINE_COMBINATION = 1778 # 1778
- SCHEMAP_MISSING_SIMPLETYPE_CHILD = 1779 # 1779
- SCHEMAP_INVALID_ATTR_NAME = 1780 # 1780
- SCHEMAP_REF_AND_CONTENT = 1781 # 1781
- SCHEMAP_CT_PROPS_CORRECT_1 = 1782 # 1782
- SCHEMAP_CT_PROPS_CORRECT_2 = 1783 # 1783
- SCHEMAP_CT_PROPS_CORRECT_3 = 1784 # 1784
- SCHEMAP_CT_PROPS_CORRECT_4 = 1785 # 1785
- SCHEMAP_CT_PROPS_CORRECT_5 = 1786 # 1786
- SCHEMAP_DERIVATION_OK_RESTRICTION_1 = 1787 # 1787
- SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_1 = 1788 # 1788
- SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_2 = 1789 # 1789
- SCHEMAP_DERIVATION_OK_RESTRICTION_2_2 = 1790 # 1790
- SCHEMAP_DERIVATION_OK_RESTRICTION_3 = 1791 # 1791
- SCHEMAP_WILDCARD_INVALID_NS_MEMBER = 1792 # 1792
- SCHEMAP_INTERSECTION_NOT_EXPRESSIBLE = 1793 # 1793
- SCHEMAP_UNION_NOT_EXPRESSIBLE = 1794 # 1794
- SCHEMAP_SRC_IMPORT_3_1 = 1795 # 1795
- SCHEMAP_SRC_IMPORT_3_2 = 1796 # 1796
- SCHEMAP_DERIVATION_OK_RESTRICTION_4_1 = 1797 # 1797
- SCHEMAP_DERIVATION_OK_RESTRICTION_4_2 = 1798 # 1798
- SCHEMAP_DERIVATION_OK_RESTRICTION_4_3 = 1799 # 1799
- SCHEMAP_COS_CT_EXTENDS_1_3 = 1800 # 1800
- SCHEMAV_NOROOT = 1801
- SCHEMAV_UNDECLAREDELEM = 1802 # 1802
- SCHEMAV_NOTTOPLEVEL = 1803 # 1803
- SCHEMAV_MISSING = 1804 # 1804
- SCHEMAV_WRONGELEM = 1805 # 1805
- SCHEMAV_NOTYPE = 1806 # 1806
- SCHEMAV_NOROLLBACK = 1807 # 1807
- SCHEMAV_ISABSTRACT = 1808 # 1808
- SCHEMAV_NOTEMPTY = 1809 # 1809
- SCHEMAV_ELEMCONT = 1810 # 1810
- SCHEMAV_HAVEDEFAULT = 1811 # 1811
- SCHEMAV_NOTNILLABLE = 1812 # 1812
- SCHEMAV_EXTRACONTENT = 1813 # 1813
- SCHEMAV_INVALIDATTR = 1814 # 1814
- SCHEMAV_INVALIDELEM = 1815 # 1815
- SCHEMAV_NOTDETERMINIST = 1816 # 1816
- SCHEMAV_CONSTRUCT = 1817 # 1817
- SCHEMAV_INTERNAL = 1818 # 1818
- SCHEMAV_NOTSIMPLE = 1819 # 1819
- SCHEMAV_ATTRUNKNOWN = 1820 # 1820
- SCHEMAV_ATTRINVALID = 1821 # 1821
- SCHEMAV_VALUE = 1822 # 1822
- SCHEMAV_FACET = 1823 # 1823
- SCHEMAV_CVC_DATATYPE_VALID_1_2_1 = 1824 # 1824
- SCHEMAV_CVC_DATATYPE_VALID_1_2_2 = 1825 # 1825
- SCHEMAV_CVC_DATATYPE_VALID_1_2_3 = 1826 # 1826
- SCHEMAV_CVC_TYPE_3_1_1 = 1827 # 1827
- SCHEMAV_CVC_TYPE_3_1_2 = 1828 # 1828
- SCHEMAV_CVC_FACET_VALID = 1829 # 1829
- SCHEMAV_CVC_LENGTH_VALID = 1830 # 1830
- SCHEMAV_CVC_MINLENGTH_VALID = 1831 # 1831
- SCHEMAV_CVC_MAXLENGTH_VALID = 1832 # 1832
- SCHEMAV_CVC_MININCLUSIVE_VALID = 1833 # 1833
- SCHEMAV_CVC_MAXINCLUSIVE_VALID = 1834 # 1834
- SCHEMAV_CVC_MINEXCLUSIVE_VALID = 1835 # 1835
- SCHEMAV_CVC_MAXEXCLUSIVE_VALID = 1836 # 1836
- SCHEMAV_CVC_TOTALDIGITS_VALID = 1837 # 1837
- SCHEMAV_CVC_FRACTIONDIGITS_VALID = 1838 # 1838
- SCHEMAV_CVC_PATTERN_VALID = 1839 # 1839
- SCHEMAV_CVC_ENUMERATION_VALID = 1840 # 1840
- SCHEMAV_CVC_COMPLEX_TYPE_2_1 = 1841 # 1841
- SCHEMAV_CVC_COMPLEX_TYPE_2_2 = 1842 # 1842
- SCHEMAV_CVC_COMPLEX_TYPE_2_3 = 1843 # 1843
- SCHEMAV_CVC_COMPLEX_TYPE_2_4 = 1844 # 1844
- SCHEMAV_CVC_ELT_1 = 1845 # 1845
- SCHEMAV_CVC_ELT_2 = 1846 # 1846
- SCHEMAV_CVC_ELT_3_1 = 1847 # 1847
- SCHEMAV_CVC_ELT_3_2_1 = 1848 # 1848
- SCHEMAV_CVC_ELT_3_2_2 = 1849 # 1849
- SCHEMAV_CVC_ELT_4_1 = 1850 # 1850
- SCHEMAV_CVC_ELT_4_2 = 1851 # 1851
- SCHEMAV_CVC_ELT_4_3 = 1852 # 1852
- SCHEMAV_CVC_ELT_5_1_1 = 1853 # 1853
- SCHEMAV_CVC_ELT_5_1_2 = 1854 # 1854
- SCHEMAV_CVC_ELT_5_2_1 = 1855 # 1855
- SCHEMAV_CVC_ELT_5_2_2_1 = 1856 # 1856
- SCHEMAV_CVC_ELT_5_2_2_2_1 = 1857 # 1857
- SCHEMAV_CVC_ELT_5_2_2_2_2 = 1858 # 1858
- SCHEMAV_CVC_ELT_6 = 1859 # 1859
- SCHEMAV_CVC_ELT_7 = 1860 # 1860
- SCHEMAV_CVC_ATTRIBUTE_1 = 1861 # 1861
- SCHEMAV_CVC_ATTRIBUTE_2 = 1862 # 1862
- SCHEMAV_CVC_ATTRIBUTE_3 = 1863 # 1863
- SCHEMAV_CVC_ATTRIBUTE_4 = 1864 # 1864
- SCHEMAV_CVC_COMPLEX_TYPE_3_1 = 1865 # 1865
- SCHEMAV_CVC_COMPLEX_TYPE_3_2_1 = 1866 # 1866
- SCHEMAV_CVC_COMPLEX_TYPE_3_2_2 = 1867 # 1867
- SCHEMAV_CVC_COMPLEX_TYPE_4 = 1868 # 1868
- SCHEMAV_CVC_COMPLEX_TYPE_5_1 = 1869 # 1869
- SCHEMAV_CVC_COMPLEX_TYPE_5_2 = 1870 # 1870
- SCHEMAV_ELEMENT_CONTENT = 1871 # 1871
- SCHEMAV_DOCUMENT_ELEMENT_MISSING = 1872 # 1872
- SCHEMAV_CVC_COMPLEX_TYPE_1 = 1873 # 1873
- SCHEMAV_CVC_AU = 1874 # 1874
- SCHEMAV_CVC_TYPE_1 = 1875 # 1875
- SCHEMAV_CVC_TYPE_2 = 1876 # 1876
- XPTR_UNKNOWN_SCHEME = 1900
- XPTR_CHILDSEQ_START = 1901 # 1901
- XPTR_EVAL_FAILED = 1902 # 1902
- XPTR_EXTRA_OBJECTS = 1903 # 1903
- C14N_CREATE_CTXT = 1950
- C14N_REQUIRES_UTF8 = 1951 # 1951
- C14N_CREATE_STACK = 1952 # 1952
- C14N_INVALID_NODE = 1953 # 1953
- FTP_PASV_ANSWER = 2000
- FTP_EPSV_ANSWER = 2001 # 2001
- FTP_ACCNT = 2002 # 2002
- HTTP_URL_SYNTAX = 2020
- HTTP_USE_IP = 2021 # 2021
- HTTP_UNKNOWN_HOST = 2022 # 2022
- SCHEMAP_SRC_SIMPLE_TYPE_1 = 3000
- SCHEMAP_SRC_SIMPLE_TYPE_2 = 3001 # 3001
- SCHEMAP_SRC_SIMPLE_TYPE_3 = 3002 # 3002
- SCHEMAP_SRC_SIMPLE_TYPE_4 = 3003 # 3003
- SCHEMAP_SRC_RESOLVE = 3004 # 3004
- SCHEMAP_SRC_RESTRICTION_BASE_OR_SIMPLETYPE = 3005 # 3005
- SCHEMAP_SRC_LIST_ITEMTYPE_OR_SIMPLETYPE = 3006 # 3006
- SCHEMAP_SRC_UNION_MEMBERTYPES_OR_SIMPLETYPES = 3007 # 3007
- SCHEMAP_ST_PROPS_CORRECT_1 = 3008 # 3008
- SCHEMAP_ST_PROPS_CORRECT_2 = 3009 # 3009
- SCHEMAP_ST_PROPS_CORRECT_3 = 3010 # 3010
- SCHEMAP_COS_ST_RESTRICTS_1_1 = 3011 # 3011
- SCHEMAP_COS_ST_RESTRICTS_1_2 = 3012 # 3012
- SCHEMAP_COS_ST_RESTRICTS_1_3_1 = 3013 # 3013
- SCHEMAP_COS_ST_RESTRICTS_1_3_2 = 3014 # 3014
- SCHEMAP_COS_ST_RESTRICTS_2_1 = 3015 # 3015
- SCHEMAP_COS_ST_RESTRICTS_2_3_1_1 = 3016 # 3016
- SCHEMAP_COS_ST_RESTRICTS_2_3_1_2 = 3017 # 3017
- SCHEMAP_COS_ST_RESTRICTS_2_3_2_1 = 3018 # 3018
- SCHEMAP_COS_ST_RESTRICTS_2_3_2_2 = 3019 # 3019
- SCHEMAP_COS_ST_RESTRICTS_2_3_2_3 = 3020 # 3020
- SCHEMAP_COS_ST_RESTRICTS_2_3_2_4 = 3021 # 3021
- SCHEMAP_COS_ST_RESTRICTS_2_3_2_5 = 3022 # 3022
- SCHEMAP_COS_ST_RESTRICTS_3_1 = 3023 # 3023
- SCHEMAP_COS_ST_RESTRICTS_3_3_1 = 3024 # 3024
- SCHEMAP_COS_ST_RESTRICTS_3_3_1_2 = 3025 # 3025
- SCHEMAP_COS_ST_RESTRICTS_3_3_2_2 = 3026 # 3026
- SCHEMAP_COS_ST_RESTRICTS_3_3_2_1 = 3027 # 3027
- SCHEMAP_COS_ST_RESTRICTS_3_3_2_3 = 3028 # 3028
- SCHEMAP_COS_ST_RESTRICTS_3_3_2_4 = 3029 # 3029
- SCHEMAP_COS_ST_RESTRICTS_3_3_2_5 = 3030 # 3030
- SCHEMAP_COS_ST_DERIVED_OK_2_1 = 3031 # 3031
- SCHEMAP_COS_ST_DERIVED_OK_2_2 = 3032 # 3032
- SCHEMAP_S4S_ELEM_NOT_ALLOWED = 3033 # 3033
- SCHEMAP_S4S_ELEM_MISSING = 3034 # 3034
- SCHEMAP_S4S_ATTR_NOT_ALLOWED = 3035 # 3035
- SCHEMAP_S4S_ATTR_MISSING = 3036 # 3036
- SCHEMAP_S4S_ATTR_INVALID_VALUE = 3037 # 3037
- SCHEMAP_SRC_ELEMENT_1 = 3038 # 3038
- SCHEMAP_SRC_ELEMENT_2_1 = 3039 # 3039
- SCHEMAP_SRC_ELEMENT_2_2 = 3040 # 3040
- SCHEMAP_SRC_ELEMENT_3 = 3041 # 3041
- SCHEMAP_P_PROPS_CORRECT_1 = 3042 # 3042
- SCHEMAP_P_PROPS_CORRECT_2_1 = 3043 # 3043
- SCHEMAP_P_PROPS_CORRECT_2_2 = 3044 # 3044
- SCHEMAP_E_PROPS_CORRECT_2 = 3045 # 3045
- SCHEMAP_E_PROPS_CORRECT_3 = 3046 # 3046
- SCHEMAP_E_PROPS_CORRECT_4 = 3047 # 3047
- SCHEMAP_E_PROPS_CORRECT_5 = 3048 # 3048
- SCHEMAP_E_PROPS_CORRECT_6 = 3049 # 3049
- SCHEMAP_SRC_INCLUDE = 3050 # 3050
- SCHEMAP_SRC_ATTRIBUTE_1 = 3051 # 3051
- SCHEMAP_SRC_ATTRIBUTE_2 = 3052 # 3052
- SCHEMAP_SRC_ATTRIBUTE_3_1 = 3053 # 3053
- SCHEMAP_SRC_ATTRIBUTE_3_2 = 3054 # 3054
- SCHEMAP_SRC_ATTRIBUTE_4 = 3055 # 3055
- SCHEMAP_NO_XMLNS = 3056 # 3056
- SCHEMAP_NO_XSI = 3057 # 3057
- SCHEMAP_COS_VALID_DEFAULT_1 = 3058 # 3058
- SCHEMAP_COS_VALID_DEFAULT_2_1 = 3059 # 3059
- SCHEMAP_COS_VALID_DEFAULT_2_2_1 = 3060 # 3060
- SCHEMAP_COS_VALID_DEFAULT_2_2_2 = 3061 # 3061
- SCHEMAP_CVC_SIMPLE_TYPE = 3062 # 3062
- SCHEMAP_COS_CT_EXTENDS_1_1 = 3063 # 3063
- SCHEMAP_SRC_IMPORT_1_1 = 3064 # 3064
- SCHEMAP_SRC_IMPORT_1_2 = 3065 # 3065
- SCHEMAP_SRC_IMPORT_2 = 3066 # 3066
- SCHEMAP_SRC_IMPORT_2_1 = 3067 # 3067
- SCHEMAP_SRC_IMPORT_2_2 = 3068 # 3068
- SCHEMAP_INTERNAL = 3069 # 3069 non-W3C
- SCHEMAP_NOT_DETERMINISTIC = 3070 # 3070 non-W3C
- SCHEMAP_SRC_ATTRIBUTE_GROUP_1 = 3071 # 3071
- SCHEMAP_SRC_ATTRIBUTE_GROUP_2 = 3072 # 3072
- SCHEMAP_SRC_ATTRIBUTE_GROUP_3 = 3073 # 3073
- SCHEMAP_MG_PROPS_CORRECT_1 = 3074 # 3074
- SCHEMAP_MG_PROPS_CORRECT_2 = 3075 # 3075
- SCHEMAP_SRC_CT_1 = 3076 # 3076
- SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_3 = 3077 # 3077
- SCHEMAP_AU_PROPS_CORRECT_2 = 3078 # 3078
- SCHEMAP_A_PROPS_CORRECT_2 = 3079 # 3079
- MODULE_OPEN = 4900 # 4900
- MODULE_CLOSE = 4901 # 4901
- CHECK_FOUND_ELEMENT = 5000
- CHECK_FOUND_ATTRIBUTE = 5001 # 5001
- CHECK_FOUND_TEXT = 5002 # 5002
- CHECK_FOUND_CDATA = 5003 # 5003
- CHECK_FOUND_ENTITYREF = 5004 # 5004
- CHECK_FOUND_ENTITY = 5005 # 5005
- CHECK_FOUND_PI = 5006 # 5006
- CHECK_FOUND_COMMENT = 5007 # 5007
- CHECK_FOUND_DOCTYPE = 5008 # 5008
- CHECK_FOUND_FRAGMENT = 5009 # 5009
- CHECK_FOUND_NOTATION = 5010 # 5010
- CHECK_UNKNOWN_NODE = 5011 # 5011
- CHECK_ENTITY_TYPE = 5012 # 5012
- CHECK_NO_PARENT = 5013 # 5013
- CHECK_NO_DOC = 5014 # 5014
- CHECK_NO_NAME = 5015 # 5015
- CHECK_NO_ELEM = 5016 # 5016
- CHECK_WRONG_DOC = 5017 # 5017
- CHECK_NO_PREV = 5018 # 5018
- CHECK_WRONG_PREV = 5019 # 5019
- CHECK_NO_NEXT = 5020 # 5020
- CHECK_WRONG_NEXT = 5021 # 5021
- CHECK_NOT_DTD = 5022 # 5022
- CHECK_NOT_ATTR = 5023 # 5023
- CHECK_NOT_ATTR_DECL = 5024 # 5024
- CHECK_NOT_ELEM_DECL = 5025 # 5025
- CHECK_NOT_ENTITY_DECL = 5026 # 5026
- CHECK_NOT_NS_DECL = 5027 # 5027
- CHECK_NO_HREF = 5028 # 5028
- CHECK_WRONG_PARENT = 5029 # 5029
- CHECK_NS_SCOPE = 5030 # 5030
- CHECK_NS_ANCESTOR = 5031 # 5031
- CHECK_NOT_UTF8 = 5032 # 5032
- CHECK_NO_DICT = 5033 # 5033
- CHECK_NOT_NCNAME = 5034 # 5034
- CHECK_OUTSIDE_DICT = 5035 # 5035
- CHECK_WRONG_NAME = 5036 # 5036
- CHECK_NAME_NOT_NULL = 5037 # 5037
- CHECK_ = 5038 # 5033
- CHECK_X = 5039 # 503
-
-cdef object __names
-__names = ErrorLevels._names
-for name, value in vars(ErrorLevels).iteritems():
- python.PyDict_SetItem(__names, value, name)
-
-__names = ErrorDomains._names
-for name, value in vars(ErrorDomains).iteritems():
- python.PyDict_SetItem(__names, value, name)
-
-__names = ErrorTypes._names
-for name, value in vars(ErrorTypes).iteritems():
- python.PyDict_SetItem(__names, value, name)
+
+cdef object __ERROR_TYPES
+__ERROR_TYPES = """
+ XML_ERR_OK = 0
+ XML_ERR_INTERNAL_ERROR = 1 : 1
+ XML_ERR_NO_MEMORY = 2 : 2
+ XML_ERR_DOCUMENT_START = 3 : 3
+ XML_ERR_DOCUMENT_EMPTY = 4 : 4
+ XML_ERR_DOCUMENT_END = 5 : 5
+ XML_ERR_INVALID_HEX_CHARREF = 6 : 6
+ XML_ERR_INVALID_DEC_CHARREF = 7 : 7
+ XML_ERR_INVALID_CHARREF = 8 : 8
+ XML_ERR_INVALID_CHAR = 9 : 9
+ XML_ERR_CHARREF_AT_EOF = 10 : 10
+ XML_ERR_CHARREF_IN_PROLOG = 11 : 11
+ XML_ERR_CHARREF_IN_EPILOG = 12 : 12
+ XML_ERR_CHARREF_IN_DTD = 13 : 13
+ XML_ERR_ENTITYREF_AT_EOF = 14 : 14
+ XML_ERR_ENTITYREF_IN_PROLOG = 15 : 15
+ XML_ERR_ENTITYREF_IN_EPILOG = 16 : 16
+ XML_ERR_ENTITYREF_IN_DTD = 17 : 17
+ XML_ERR_PEREF_AT_EOF = 18 : 18
+ XML_ERR_PEREF_IN_PROLOG = 19 : 19
+ XML_ERR_PEREF_IN_EPILOG = 20 : 20
+ XML_ERR_PEREF_IN_INT_SUBSET = 21 : 21
+ XML_ERR_ENTITYREF_NO_NAME = 22 : 22
+ XML_ERR_ENTITYREF_SEMICOL_MISSING = 23 : 23
+ XML_ERR_PEREF_NO_NAME = 24 : 24
+ XML_ERR_PEREF_SEMICOL_MISSING = 25 : 25
+ XML_ERR_UNDECLARED_ENTITY = 26 : 26
+ XML_WAR_UNDECLARED_ENTITY = 27 : 27
+ XML_ERR_UNPARSED_ENTITY = 28 : 28
+ XML_ERR_ENTITY_IS_EXTERNAL = 29 : 29
+ XML_ERR_ENTITY_IS_PARAMETER = 30 : 30
+ XML_ERR_UNKNOWN_ENCODING = 31 : 31
+ XML_ERR_UNSUPPORTED_ENCODING = 32 : 32
+ XML_ERR_STRING_NOT_STARTED = 33 : 33
+ XML_ERR_STRING_NOT_CLOSED = 34 : 34
+ XML_ERR_NS_DECL_ERROR = 35 : 35
+ XML_ERR_ENTITY_NOT_STARTED = 36 : 36
+ XML_ERR_ENTITY_NOT_FINISHED = 37 : 37
+ XML_ERR_LT_IN_ATTRIBUTE = 38 : 38
+ XML_ERR_ATTRIBUTE_NOT_STARTED = 39 : 39
+ XML_ERR_ATTRIBUTE_NOT_FINISHED = 40 : 40
+ XML_ERR_ATTRIBUTE_WITHOUT_VALUE = 41 : 41
+ XML_ERR_ATTRIBUTE_REDEFINED = 42 : 42
+ XML_ERR_LITERAL_NOT_STARTED = 43 : 43
+ XML_ERR_LITERAL_NOT_FINISHED = 44 : 44
+ XML_ERR_COMMENT_NOT_FINISHED = 45 : 45
+ XML_ERR_PI_NOT_STARTED = 46 : 46
+ XML_ERR_PI_NOT_FINISHED = 47 : 47
+ XML_ERR_NOTATION_NOT_STARTED = 48 : 48
+ XML_ERR_NOTATION_NOT_FINISHED = 49 : 49
+ XML_ERR_ATTLIST_NOT_STARTED = 50 : 50
+ XML_ERR_ATTLIST_NOT_FINISHED = 51 : 51
+ XML_ERR_MIXED_NOT_STARTED = 52 : 52
+ XML_ERR_MIXED_NOT_FINISHED = 53 : 53
+ XML_ERR_ELEMCONTENT_NOT_STARTED = 54 : 54
+ XML_ERR_ELEMCONTENT_NOT_FINISHED = 55 : 55
+ XML_ERR_XMLDECL_NOT_STARTED = 56 : 56
+ XML_ERR_XMLDECL_NOT_FINISHED = 57 : 57
+ XML_ERR_CONDSEC_NOT_STARTED = 58 : 58
+ XML_ERR_CONDSEC_NOT_FINISHED = 59 : 59
+ XML_ERR_EXT_SUBSET_NOT_FINISHED = 60 : 60
+ XML_ERR_DOCTYPE_NOT_FINISHED = 61 : 61
+ XML_ERR_MISPLACED_CDATA_END = 62 : 62
+ XML_ERR_CDATA_NOT_FINISHED = 63 : 63
+ XML_ERR_RESERVED_XML_NAME = 64 : 64
+ XML_ERR_SPACE_REQUIRED = 65 : 65
+ XML_ERR_SEPARATOR_REQUIRED = 66 : 66
+ XML_ERR_NMTOKEN_REQUIRED = 67 : 67
+ XML_ERR_NAME_REQUIRED = 68 : 68
+ XML_ERR_PCDATA_REQUIRED = 69 : 69
+ XML_ERR_URI_REQUIRED = 70 : 70
+ XML_ERR_PUBID_REQUIRED = 71 : 71
+ XML_ERR_LT_REQUIRED = 72 : 72
+ XML_ERR_GT_REQUIRED = 73 : 73
+ XML_ERR_LTSLASH_REQUIRED = 74 : 74
+ XML_ERR_EQUAL_REQUIRED = 75 : 75
+ XML_ERR_TAG_NAME_MISMATCH = 76 : 76
+ XML_ERR_TAG_NOT_FINISHED = 77 : 77
+ XML_ERR_STANDALONE_VALUE = 78 : 78
+ XML_ERR_ENCODING_NAME = 79 : 79
+ XML_ERR_HYPHEN_IN_COMMENT = 80 : 80
+ XML_ERR_INVALID_ENCODING = 81 : 81
+ XML_ERR_EXT_ENTITY_STANDALONE = 82 : 82
+ XML_ERR_CONDSEC_INVALID = 83 : 83
+ XML_ERR_VALUE_REQUIRED = 84 : 84
+ XML_ERR_NOT_WELL_BALANCED = 85 : 85
+ XML_ERR_EXTRA_CONTENT = 86 : 86
+ XML_ERR_ENTITY_CHAR_ERROR = 87 : 87
+ XML_ERR_ENTITY_PE_INTERNAL = 88 : 88
+ XML_ERR_ENTITY_LOOP = 89 : 89
+ XML_ERR_ENTITY_BOUNDARY = 90 : 90
+ XML_ERR_INVALID_URI = 91 : 91
+ XML_ERR_URI_FRAGMENT = 92 : 92
+ XML_WAR_CATALOG_PI = 93 : 93
+ XML_ERR_NO_DTD = 94 : 94
+ XML_ERR_CONDSEC_INVALID_KEYWORD = 95 : 95
+ XML_ERR_VERSION_MISSING = 96 : 96
+ XML_WAR_UNKNOWN_VERSION = 97 : 97
+ XML_WAR_LANG_VALUE = 98 : 98
+ XML_WAR_NS_URI = 99 : 99
+ XML_WAR_NS_URI_RELATIVE = 100 : 100
+ XML_ERR_MISSING_ENCODING = 101 : 101
+ XML_WAR_SPACE_VALUE = 102 : 102
+ XML_ERR_NOT_STANDALONE = 103 : 103
+ XML_ERR_ENTITY_PROCESSING = 104 : 104
+ XML_ERR_NOTATION_PROCESSING = 105 : 105
+ XML_WAR_NS_COLUMN = 106 : 106
+ XML_WAR_ENTITY_REDEFINED = 107 : 107
+ XML_NS_ERR_XML_NAMESPACE = 200
+ XML_NS_ERR_UNDEFINED_NAMESPACE = 201 : 201
+ XML_NS_ERR_QNAME = 202 : 202
+ XML_NS_ERR_ATTRIBUTE_REDEFINED = 203 : 203
+ XML_NS_ERR_EMPTY = 204 : 204
+ XML_DTD_ATTRIBUTE_DEFAULT = 500
+ XML_DTD_ATTRIBUTE_REDEFINED = 501 : 501
+ XML_DTD_ATTRIBUTE_VALUE = 502 : 502
+ XML_DTD_CONTENT_ERROR = 503 : 503
+ XML_DTD_CONTENT_MODEL = 504 : 504
+ XML_DTD_CONTENT_NOT_DETERMINIST = 505 : 505
+ XML_DTD_DIFFERENT_PREFIX = 506 : 506
+ XML_DTD_ELEM_DEFAULT_NAMESPACE = 507 : 507
+ XML_DTD_ELEM_NAMESPACE = 508 : 508
+ XML_DTD_ELEM_REDEFINED = 509 : 509
+ XML_DTD_EMPTY_NOTATION = 510 : 510
+ XML_DTD_ENTITY_TYPE = 511 : 511
+ XML_DTD_ID_FIXED = 512 : 512
+ XML_DTD_ID_REDEFINED = 513 : 513
+ XML_DTD_ID_SUBSET = 514 : 514
+ XML_DTD_INVALID_CHILD = 515 : 515
+ XML_DTD_INVALID_DEFAULT = 516 : 516
+ XML_DTD_LOAD_ERROR = 517 : 517
+ XML_DTD_MISSING_ATTRIBUTE = 518 : 518
+ XML_DTD_MIXED_CORRUPT = 519 : 519
+ XML_DTD_MULTIPLE_ID = 520 : 520
+ XML_DTD_NO_DOC = 521 : 521
+ XML_DTD_NO_DTD = 522 : 522
+ XML_DTD_NO_ELEM_NAME = 523 : 523
+ XML_DTD_NO_PREFIX = 524 : 524
+ XML_DTD_NO_ROOT = 525 : 525
+ XML_DTD_NOTATION_REDEFINED = 526 : 526
+ XML_DTD_NOTATION_VALUE = 527 : 527
+ XML_DTD_NOT_EMPTY = 528 : 528
+ XML_DTD_NOT_PCDATA = 529 : 529
+ XML_DTD_NOT_STANDALONE = 530 : 530
+ XML_DTD_ROOT_NAME = 531 : 531
+ XML_DTD_STANDALONE_WHITE_SPACE = 532 : 532
+ XML_DTD_UNKNOWN_ATTRIBUTE = 533 : 533
+ XML_DTD_UNKNOWN_ELEM = 534 : 534
+ XML_DTD_UNKNOWN_ENTITY = 535 : 535
+ XML_DTD_UNKNOWN_ID = 536 : 536
+ XML_DTD_UNKNOWN_NOTATION = 537 : 537
+ XML_DTD_STANDALONE_DEFAULTED = 538 : 538
+ XML_DTD_XMLID_VALUE = 539 : 539
+ XML_DTD_XMLID_TYPE = 540 : 540
+ XML_HTML_STRUCURE_ERROR = 800
+ XML_HTML_UNKNOWN_TAG = 801 : 801
+ XML_RNGP_ANYNAME_ATTR_ANCESTOR = 1000
+ XML_RNGP_ATTR_CONFLICT = 1001 : 1001
+ XML_RNGP_ATTRIBUTE_CHILDREN = 1002 : 1002
+ XML_RNGP_ATTRIBUTE_CONTENT = 1003 : 1003
+ XML_RNGP_ATTRIBUTE_EMPTY = 1004 : 1004
+ XML_RNGP_ATTRIBUTE_NOOP = 1005 : 1005
+ XML_RNGP_CHOICE_CONTENT = 1006 : 1006
+ XML_RNGP_CHOICE_EMPTY = 1007 : 1007
+ XML_RNGP_CREATE_FAILURE = 1008 : 1008
+ XML_RNGP_DATA_CONTENT = 1009 : 1009
+ XML_RNGP_DEF_CHOICE_AND_INTERLEAVE = 1010 : 1010
+ XML_RNGP_DEFINE_CREATE_FAILED = 1011 : 1011
+ XML_RNGP_DEFINE_EMPTY = 1012 : 1012
+ XML_RNGP_DEFINE_MISSING = 1013 : 1013
+ XML_RNGP_DEFINE_NAME_MISSING = 1014 : 1014
+ XML_RNGP_ELEM_CONTENT_EMPTY = 1015 : 1015
+ XML_RNGP_ELEM_CONTENT_ERROR = 1016 : 1016
+ XML_RNGP_ELEMENT_EMPTY = 1017 : 1017
+ XML_RNGP_ELEMENT_CONTENT = 1018 : 1018
+ XML_RNGP_ELEMENT_NAME = 1019 : 1019
+ XML_RNGP_ELEMENT_NO_CONTENT = 1020 : 1020
+ XML_RNGP_ELEM_TEXT_CONFLICT = 1021 : 1021
+ XML_RNGP_EMPTY = 1022 : 1022
+ XML_RNGP_EMPTY_CONSTRUCT = 1023 : 1023
+ XML_RNGP_EMPTY_CONTENT = 1024 : 1024
+ XML_RNGP_EMPTY_NOT_EMPTY = 1025 : 1025
+ XML_RNGP_ERROR_TYPE_LIB = 1026 : 1026
+ XML_RNGP_EXCEPT_EMPTY = 1027 : 1027
+ XML_RNGP_EXCEPT_MISSING = 1028 : 1028
+ XML_RNGP_EXCEPT_MULTIPLE = 1029 : 1029
+ XML_RNGP_EXCEPT_NO_CONTENT = 1030 : 1030
+ XML_RNGP_EXTERNALREF_EMTPY = 1031 : 1031
+ XML_RNGP_EXTERNAL_REF_FAILURE = 1032 : 1032
+ XML_RNGP_EXTERNALREF_RECURSE = 1033 : 1033
+ XML_RNGP_FORBIDDEN_ATTRIBUTE = 1034 : 1034
+ XML_RNGP_FOREIGN_ELEMENT = 1035 : 1035
+ XML_RNGP_GRAMMAR_CONTENT = 1036 : 1036
+ XML_RNGP_GRAMMAR_EMPTY = 1037 : 1037
+ XML_RNGP_GRAMMAR_MISSING = 1038 : 1038
+ XML_RNGP_GRAMMAR_NO_START = 1039 : 1039
+ XML_RNGP_GROUP_ATTR_CONFLICT = 1040 : 1040
+ XML_RNGP_HREF_ERROR = 1041 : 1041
+ XML_RNGP_INCLUDE_EMPTY = 1042 : 1042
+ XML_RNGP_INCLUDE_FAILURE = 1043 : 1043
+ XML_RNGP_INCLUDE_RECURSE = 1044 : 1044
+ XML_RNGP_INTERLEAVE_ADD = 1045 : 1045
+ XML_RNGP_INTERLEAVE_CREATE_FAILED = 1046 : 1046
+ XML_RNGP_INTERLEAVE_EMPTY = 1047 : 1047
+ XML_RNGP_INTERLEAVE_NO_CONTENT = 1048 : 1048
+ XML_RNGP_INVALID_DEFINE_NAME = 1049 : 1049
+ XML_RNGP_INVALID_URI = 1050 : 1050
+ XML_RNGP_INVALID_VALUE = 1051 : 1051
+ XML_RNGP_MISSING_HREF = 1052 : 1052
+ XML_RNGP_NAME_MISSING = 1053 : 1053
+ XML_RNGP_NEED_COMBINE = 1054 : 1054
+ XML_RNGP_NOTALLOWED_NOT_EMPTY = 1055 : 1055
+ XML_RNGP_NSNAME_ATTR_ANCESTOR = 1056 : 1056
+ XML_RNGP_NSNAME_NO_NS = 1057 : 1057
+ XML_RNGP_PARAM_FORBIDDEN = 1058 : 1058
+ XML_RNGP_PARAM_NAME_MISSING = 1059 : 1059
+ XML_RNGP_PARENTREF_CREATE_FAILED = 1060 : 1060
+ XML_RNGP_PARENTREF_NAME_INVALID = 1061 : 1061
+ XML_RNGP_PARENTREF_NO_NAME = 1062 : 1062
+ XML_RNGP_PARENTREF_NO_PARENT = 1063 : 1063
+ XML_RNGP_PARENTREF_NOT_EMPTY = 1064 : 1064
+ XML_RNGP_PARSE_ERROR = 1065 : 1065
+ XML_RNGP_PAT_ANYNAME_EXCEPT_ANYNAME = 1066 : 1066
+ XML_RNGP_PAT_ATTR_ATTR = 1067 : 1067
+ XML_RNGP_PAT_ATTR_ELEM = 1068 : 1068
+ XML_RNGP_PAT_DATA_EXCEPT_ATTR = 1069 : 1069
+ XML_RNGP_PAT_DATA_EXCEPT_ELEM = 1070 : 1070
+ XML_RNGP_PAT_DATA_EXCEPT_EMPTY = 1071 : 1071
+ XML_RNGP_PAT_DATA_EXCEPT_GROUP = 1072 : 1072
+ XML_RNGP_PAT_DATA_EXCEPT_INTERLEAVE = 1073 : 1073
+ XML_RNGP_PAT_DATA_EXCEPT_LIST = 1074 : 1074
+ XML_RNGP_PAT_DATA_EXCEPT_ONEMORE = 1075 : 1075
+ XML_RNGP_PAT_DATA_EXCEPT_REF = 1076 : 1076
+ XML_RNGP_PAT_DATA_EXCEPT_TEXT = 1077 : 1077
+ XML_RNGP_PAT_LIST_ATTR = 1078 : 1078
+ XML_RNGP_PAT_LIST_ELEM = 1079 : 1079
+ XML_RNGP_PAT_LIST_INTERLEAVE = 1080 : 1080
+ XML_RNGP_PAT_LIST_LIST = 1081 : 1081
+ XML_RNGP_PAT_LIST_REF = 1082 : 1082
+ XML_RNGP_PAT_LIST_TEXT = 1083 : 1083
+ XML_RNGP_PAT_NSNAME_EXCEPT_ANYNAME = 1084 : 1084
+ XML_RNGP_PAT_NSNAME_EXCEPT_NSNAME = 1085 : 1085
+ XML_RNGP_PAT_ONEMORE_GROUP_ATTR = 1086 : 1086
+ XML_RNGP_PAT_ONEMORE_INTERLEAVE_ATTR = 1087 : 1087
+ XML_RNGP_PAT_START_ATTR = 1088 : 1088
+ XML_RNGP_PAT_START_DATA = 1089 : 1089
+ XML_RNGP_PAT_START_EMPTY = 1090 : 1090
+ XML_RNGP_PAT_START_GROUP = 1091 : 1091
+ XML_RNGP_PAT_START_INTERLEAVE = 1092 : 1092
+ XML_RNGP_PAT_START_LIST = 1093 : 1093
+ XML_RNGP_PAT_START_ONEMORE = 1094 : 1094
+ XML_RNGP_PAT_START_TEXT = 1095 : 1095
+ XML_RNGP_PAT_START_VALUE = 1096 : 1096
+ XML_RNGP_PREFIX_UNDEFINED = 1097 : 1097
+ XML_RNGP_REF_CREATE_FAILED = 1098 : 1098
+ XML_RNGP_REF_CYCLE = 1099 : 1099
+ XML_RNGP_REF_NAME_INVALID = 1100 : 1100
+ XML_RNGP_REF_NO_DEF = 1101 : 1101
+ XML_RNGP_REF_NO_NAME = 1102 : 1102
+ XML_RNGP_REF_NOT_EMPTY = 1103 : 1103
+ XML_RNGP_START_CHOICE_AND_INTERLEAVE = 1104 : 1104
+ XML_RNGP_START_CONTENT = 1105 : 1105
+ XML_RNGP_START_EMPTY = 1106 : 1106
+ XML_RNGP_START_MISSING = 1107 : 1107
+ XML_RNGP_TEXT_EXPECTED = 1108 : 1108
+ XML_RNGP_TEXT_HAS_CHILD = 1109 : 1109
+ XML_RNGP_TYPE_MISSING = 1110 : 1110
+ XML_RNGP_TYPE_NOT_FOUND = 1111 : 1111
+ XML_RNGP_TYPE_VALUE = 1112 : 1112
+ XML_RNGP_UNKNOWN_ATTRIBUTE = 1113 : 1113
+ XML_RNGP_UNKNOWN_COMBINE = 1114 : 1114
+ XML_RNGP_UNKNOWN_CONSTRUCT = 1115 : 1115
+ XML_RNGP_UNKNOWN_TYPE_LIB = 1116 : 1116
+ XML_RNGP_URI_FRAGMENT = 1117 : 1117
+ XML_RNGP_URI_NOT_ABSOLUTE = 1118 : 1118
+ XML_RNGP_VALUE_EMPTY = 1119 : 1119
+ XML_RNGP_VALUE_NO_CONTENT = 1120 : 1120
+ XML_RNGP_XMLNS_NAME = 1121 : 1121
+ XML_RNGP_XML_NS = 1122 : 1122
+ XML_XPATH_EXPRESSION_OK = 1200
+ XML_XPATH_NUMBER_ERROR = 1201 : 1201
+ XML_XPATH_UNFINISHED_LITERAL_ERROR = 1202 : 1202
+ XML_XPATH_START_LITERAL_ERROR = 1203 : 1203
+ XML_XPATH_VARIABLE_REF_ERROR = 1204 : 1204
+ XML_XPATH_UNDEF_VARIABLE_ERROR = 1205 : 1205
+ XML_XPATH_INVALID_PREDICATE_ERROR = 1206 : 1206
+ XML_XPATH_EXPR_ERROR = 1207 : 1207
+ XML_XPATH_UNCLOSED_ERROR = 1208 : 1208
+ XML_XPATH_UNKNOWN_FUNC_ERROR = 1209 : 1209
+ XML_XPATH_INVALID_OPERAND = 1210 : 1210
+ XML_XPATH_INVALID_TYPE = 1211 : 1211
+ XML_XPATH_INVALID_ARITY = 1212 : 1212
+ XML_XPATH_INVALID_CTXT_SIZE = 1213 : 1213
+ XML_XPATH_INVALID_CTXT_POSITION = 1214 : 1214
+ XML_XPATH_MEMORY_ERROR = 1215 : 1215
+ XML_XPTR_SYNTAX_ERROR = 1216 : 1216
+ XML_XPTR_RESOURCE_ERROR = 1217 : 1217
+ XML_XPTR_SUB_RESOURCE_ERROR = 1218 : 1218
+ XML_XPATH_UNDEF_PREFIX_ERROR = 1219 : 1219
+ XML_XPATH_ENCODING_ERROR = 1220 : 1220
+ XML_XPATH_INVALID_CHAR_ERROR = 1221 : 1221
+ XML_TREE_INVALID_HEX = 1300
+ XML_TREE_INVALID_DEC = 1301 : 1301
+ XML_TREE_UNTERMINATED_ENTITY = 1302 : 1302
+ XML_SAVE_NOT_UTF8 = 1400
+ XML_SAVE_CHAR_INVALID = 1401 : 1401
+ XML_SAVE_NO_DOCTYPE = 1402 : 1402
+ XML_SAVE_UNKNOWN_ENCODING = 1403 : 1403
+ XML_REGEXP_COMPILE_ERROR = 1450
+ XML_IO_UNKNOWN = 1500
+ XML_IO_EACCES = 1501 : 1501
+ XML_IO_EAGAIN = 1502 : 1502
+ XML_IO_EBADF = 1503 : 1503
+ XML_IO_EBADMSG = 1504 : 1504
+ XML_IO_EBUSY = 1505 : 1505
+ XML_IO_ECANCELED = 1506 : 1506
+ XML_IO_ECHILD = 1507 : 1507
+ XML_IO_EDEADLK = 1508 : 1508
+ XML_IO_EDOM = 1509 : 1509
+ XML_IO_EEXIST = 1510 : 1510
+ XML_IO_EFAULT = 1511 : 1511
+ XML_IO_EFBIG = 1512 : 1512
+ XML_IO_EINPROGRESS = 1513 : 1513
+ XML_IO_EINTR = 1514 : 1514
+ XML_IO_EINVAL = 1515 : 1515
+ XML_IO_EIO = 1516 : 1516
+ XML_IO_EISDIR = 1517 : 1517
+ XML_IO_EMFILE = 1518 : 1518
+ XML_IO_EMLINK = 1519 : 1519
+ XML_IO_EMSGSIZE = 1520 : 1520
+ XML_IO_ENAMETOOLONG = 1521 : 1521
+ XML_IO_ENFILE = 1522 : 1522
+ XML_IO_ENODEV = 1523 : 1523
+ XML_IO_ENOENT = 1524 : 1524
+ XML_IO_ENOEXEC = 1525 : 1525
+ XML_IO_ENOLCK = 1526 : 1526
+ XML_IO_ENOMEM = 1527 : 1527
+ XML_IO_ENOSPC = 1528 : 1528
+ XML_IO_ENOSYS = 1529 : 1529
+ XML_IO_ENOTDIR = 1530 : 1530
+ XML_IO_ENOTEMPTY = 1531 : 1531
+ XML_IO_ENOTSUP = 1532 : 1532
+ XML_IO_ENOTTY = 1533 : 1533
+ XML_IO_ENXIO = 1534 : 1534
+ XML_IO_EPERM = 1535 : 1535
+ XML_IO_EPIPE = 1536 : 1536
+ XML_IO_ERANGE = 1537 : 1537
+ XML_IO_EROFS = 1538 : 1538
+ XML_IO_ESPIPE = 1539 : 1539
+ XML_IO_ESRCH = 1540 : 1540
+ XML_IO_ETIMEDOUT = 1541 : 1541
+ XML_IO_EXDEV = 1542 : 1542
+ XML_IO_NETWORK_ATTEMPT = 1543 : 1543
+ XML_IO_ENCODER = 1544 : 1544
+ XML_IO_FLUSH = 1545 : 1545
+ XML_IO_WRITE = 1546 : 1546
+ XML_IO_NO_INPUT = 1547 : 1547
+ XML_IO_BUFFER_FULL = 1548 : 1548
+ XML_IO_LOAD_ERROR = 1549 : 1549
+ XML_IO_ENOTSOCK = 1550 : 1550
+ XML_IO_EISCONN = 1551 : 1551
+ XML_IO_ECONNREFUSED = 1552 : 1552
+ XML_IO_ENETUNREACH = 1553 : 1553
+ XML_IO_EADDRINUSE = 1554 : 1554
+ XML_IO_EALREADY = 1555 : 1555
+ XML_IO_EAFNOSUPPORT = 1556 : 1556
+ XML_XINCLUDE_RECURSION = 1600
+ XML_XINCLUDE_PARSE_VALUE = 1601 : 1601
+ XML_XINCLUDE_ENTITY_DEF_MISMATCH = 1602 : 1602
+ XML_XINCLUDE_NO_HREF = 1603 : 1603
+ XML_XINCLUDE_NO_FALLBACK = 1604 : 1604
+ XML_XINCLUDE_HREF_URI = 1605 : 1605
+ XML_XINCLUDE_TEXT_FRAGMENT = 1606 : 1606
+ XML_XINCLUDE_TEXT_DOCUMENT = 1607 : 1607
+ XML_XINCLUDE_INVALID_CHAR = 1608 : 1608
+ XML_XINCLUDE_BUILD_FAILED = 1609 : 1609
+ XML_XINCLUDE_UNKNOWN_ENCODING = 1610 : 1610
+ XML_XINCLUDE_MULTIPLE_ROOT = 1611 : 1611
+ XML_XINCLUDE_XPTR_FAILED = 1612 : 1612
+ XML_XINCLUDE_XPTR_RESULT = 1613 : 1613
+ XML_XINCLUDE_INCLUDE_IN_INCLUDE = 1614 : 1614
+ XML_XINCLUDE_FALLBACKS_IN_INCLUDE = 1615 : 1615
+ XML_XINCLUDE_FALLBACK_NOT_IN_INCLUDE = 1616 : 1616
+ XML_XINCLUDE_DEPRECATED_NS = 1617 : 1617
+ XML_XINCLUDE_FRAGMENT_ID = 1618 : 1618
+ XML_CATALOG_MISSING_ATTR = 1650
+ XML_CATALOG_ENTRY_BROKEN = 1651 : 1651
+ XML_CATALOG_PREFER_VALUE = 1652 : 1652
+ XML_CATALOG_NOT_CATALOG = 1653 : 1653
+ XML_CATALOG_RECURSION = 1654 : 1654
+ XML_SCHEMAP_PREFIX_UNDEFINED = 1700
+ XML_SCHEMAP_ATTRFORMDEFAULT_VALUE = 1701 : 1701
+ XML_SCHEMAP_ATTRGRP_NONAME_NOREF = 1702 : 1702
+ XML_SCHEMAP_ATTR_NONAME_NOREF = 1703 : 1703
+ XML_SCHEMAP_COMPLEXTYPE_NONAME_NOREF = 1704 : 1704
+ XML_SCHEMAP_ELEMFORMDEFAULT_VALUE = 1705 : 1705
+ XML_SCHEMAP_ELEM_NONAME_NOREF = 1706 : 1706
+ XML_SCHEMAP_EXTENSION_NO_BASE = 1707 : 1707
+ XML_SCHEMAP_FACET_NO_VALUE = 1708 : 1708
+ XML_SCHEMAP_FAILED_BUILD_IMPORT = 1709 : 1709
+ XML_SCHEMAP_GROUP_NONAME_NOREF = 1710 : 1710
+ XML_SCHEMAP_IMPORT_NAMESPACE_NOT_URI = 1711 : 1711
+ XML_SCHEMAP_IMPORT_REDEFINE_NSNAME = 1712 : 1712
+ XML_SCHEMAP_IMPORT_SCHEMA_NOT_URI = 1713 : 1713
+ XML_SCHEMAP_INVALID_BOOLEAN = 1714 : 1714
+ XML_SCHEMAP_INVALID_ENUM = 1715 : 1715
+ XML_SCHEMAP_INVALID_FACET = 1716 : 1716
+ XML_SCHEMAP_INVALID_FACET_VALUE = 1717 : 1717
+ XML_SCHEMAP_INVALID_MAXOCCURS = 1718 : 1718
+ XML_SCHEMAP_INVALID_MINOCCURS = 1719 : 1719
+ XML_SCHEMAP_INVALID_REF_AND_SUBTYPE = 1720 : 1720
+ XML_SCHEMAP_INVALID_WHITE_SPACE = 1721 : 1721
+ XML_SCHEMAP_NOATTR_NOREF = 1722 : 1722
+ XML_SCHEMAP_NOTATION_NO_NAME = 1723 : 1723
+ XML_SCHEMAP_NOTYPE_NOREF = 1724 : 1724
+ XML_SCHEMAP_REF_AND_SUBTYPE = 1725 : 1725
+ XML_SCHEMAP_RESTRICTION_NONAME_NOREF = 1726 : 1726
+ XML_SCHEMAP_SIMPLETYPE_NONAME = 1727 : 1727
+ XML_SCHEMAP_TYPE_AND_SUBTYPE = 1728 : 1728
+ XML_SCHEMAP_UNKNOWN_ALL_CHILD = 1729 : 1729
+ XML_SCHEMAP_UNKNOWN_ANYATTRIBUTE_CHILD = 1730 : 1730
+ XML_SCHEMAP_UNKNOWN_ATTR_CHILD = 1731 : 1731
+ XML_SCHEMAP_UNKNOWN_ATTRGRP_CHILD = 1732 : 1732
+ XML_SCHEMAP_UNKNOWN_ATTRIBUTE_GROUP = 1733 : 1733
+ XML_SCHEMAP_UNKNOWN_BASE_TYPE = 1734 : 1734
+ XML_SCHEMAP_UNKNOWN_CHOICE_CHILD = 1735 : 1735
+ XML_SCHEMAP_UNKNOWN_COMPLEXCONTENT_CHILD = 1736 : 1736
+ XML_SCHEMAP_UNKNOWN_COMPLEXTYPE_CHILD = 1737 : 1737
+ XML_SCHEMAP_UNKNOWN_ELEM_CHILD = 1738 : 1738
+ XML_SCHEMAP_UNKNOWN_EXTENSION_CHILD = 1739 : 1739
+ XML_SCHEMAP_UNKNOWN_FACET_CHILD = 1740 : 1740
+ XML_SCHEMAP_UNKNOWN_FACET_TYPE = 1741 : 1741
+ XML_SCHEMAP_UNKNOWN_GROUP_CHILD = 1742 : 1742
+ XML_SCHEMAP_UNKNOWN_IMPORT_CHILD = 1743 : 1743
+ XML_SCHEMAP_UNKNOWN_LIST_CHILD = 1744 : 1744
+ XML_SCHEMAP_UNKNOWN_NOTATION_CHILD = 1745 : 1745
+ XML_SCHEMAP_UNKNOWN_PROCESSCONTENT_CHILD = 1746 : 1746
+ XML_SCHEMAP_UNKNOWN_REF = 1747 : 1747
+ XML_SCHEMAP_UNKNOWN_RESTRICTION_CHILD = 1748 : 1748
+ XML_SCHEMAP_UNKNOWN_SCHEMAS_CHILD = 1749 : 1749
+ XML_SCHEMAP_UNKNOWN_SEQUENCE_CHILD = 1750 : 1750
+ XML_SCHEMAP_UNKNOWN_SIMPLECONTENT_CHILD = 1751 : 1751
+ XML_SCHEMAP_UNKNOWN_SIMPLETYPE_CHILD = 1752 : 1752
+ XML_SCHEMAP_UNKNOWN_TYPE = 1753 : 1753
+ XML_SCHEMAP_UNKNOWN_UNION_CHILD = 1754 : 1754
+ XML_SCHEMAP_ELEM_DEFAULT_FIXED = 1755 : 1755
+ XML_SCHEMAP_REGEXP_INVALID = 1756 : 1756
+ XML_SCHEMAP_FAILED_LOAD = 1757 : 1757
+ XML_SCHEMAP_NOTHING_TO_PARSE = 1758 : 1758
+ XML_SCHEMAP_NOROOT = 1759 : 1759
+ XML_SCHEMAP_REDEFINED_GROUP = 1760 : 1760
+ XML_SCHEMAP_REDEFINED_TYPE = 1761 : 1761
+ XML_SCHEMAP_REDEFINED_ELEMENT = 1762 : 1762
+ XML_SCHEMAP_REDEFINED_ATTRGROUP = 1763 : 1763
+ XML_SCHEMAP_REDEFINED_ATTR = 1764 : 1764
+ XML_SCHEMAP_REDEFINED_NOTATION = 1765 : 1765
+ XML_SCHEMAP_FAILED_PARSE = 1766 : 1766
+ XML_SCHEMAP_UNKNOWN_PREFIX = 1767 : 1767
+ XML_SCHEMAP_DEF_AND_PREFIX = 1768 : 1768
+ XML_SCHEMAP_UNKNOWN_INCLUDE_CHILD = 1769 : 1769
+ XML_SCHEMAP_INCLUDE_SCHEMA_NOT_URI = 1770 : 1770
+ XML_SCHEMAP_INCLUDE_SCHEMA_NO_URI = 1771 : 1771
+ XML_SCHEMAP_NOT_SCHEMA = 1772 : 1772
+ XML_SCHEMAP_UNKNOWN_MEMBER_TYPE = 1773 : 1773
+ XML_SCHEMAP_INVALID_ATTR_USE = 1774 : 1774
+ XML_SCHEMAP_RECURSIVE = 1775 : 1775
+ XML_SCHEMAP_SUPERNUMEROUS_LIST_ITEM_TYPE = 1776 : 1776
+ XML_SCHEMAP_INVALID_ATTR_COMBINATION = 1777 : 1777
+ XML_SCHEMAP_INVALID_ATTR_INLINE_COMBINATION = 1778 : 1778
+ XML_SCHEMAP_MISSING_SIMPLETYPE_CHILD = 1779 : 1779
+ XML_SCHEMAP_INVALID_ATTR_NAME = 1780 : 1780
+ XML_SCHEMAP_REF_AND_CONTENT = 1781 : 1781
+ XML_SCHEMAP_CT_PROPS_CORRECT_1 = 1782 : 1782
+ XML_SCHEMAP_CT_PROPS_CORRECT_2 = 1783 : 1783
+ XML_SCHEMAP_CT_PROPS_CORRECT_3 = 1784 : 1784
+ XML_SCHEMAP_CT_PROPS_CORRECT_4 = 1785 : 1785
+ XML_SCHEMAP_CT_PROPS_CORRECT_5 = 1786 : 1786
+ XML_SCHEMAP_DERIVATION_OK_RESTRICTION_1 = 1787 : 1787
+ XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_1 = 1788 : 1788
+ XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_2 = 1789 : 1789
+ XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_2 = 1790 : 1790
+ XML_SCHEMAP_DERIVATION_OK_RESTRICTION_3 = 1791 : 1791
+ XML_SCHEMAP_WILDCARD_INVALID_NS_MEMBER = 1792 : 1792
+ XML_SCHEMAP_INTERSECTION_NOT_EXPRESSIBLE = 1793 : 1793
+ XML_SCHEMAP_UNION_NOT_EXPRESSIBLE = 1794 : 1794
+ XML_SCHEMAP_SRC_IMPORT_3_1 = 1795 : 1795
+ XML_SCHEMAP_SRC_IMPORT_3_2 = 1796 : 1796
+ XML_SCHEMAP_DERIVATION_OK_RESTRICTION_4_1 = 1797 : 1797
+ XML_SCHEMAP_DERIVATION_OK_RESTRICTION_4_2 = 1798 : 1798
+ XML_SCHEMAP_DERIVATION_OK_RESTRICTION_4_3 = 1799 : 1799
+ XML_SCHEMAP_COS_CT_EXTENDS_1_3 = 1800 : 1800
+ XML_SCHEMAV_NOROOT = 1801
+ XML_SCHEMAV_UNDECLAREDELEM = 1802 : 1802
+ XML_SCHEMAV_NOTTOPLEVEL = 1803 : 1803
+ XML_SCHEMAV_MISSING = 1804 : 1804
+ XML_SCHEMAV_WRONGELEM = 1805 : 1805
+ XML_SCHEMAV_NOTYPE = 1806 : 1806
+ XML_SCHEMAV_NOROLLBACK = 1807 : 1807
+ XML_SCHEMAV_ISABSTRACT = 1808 : 1808
+ XML_SCHEMAV_NOTEMPTY = 1809 : 1809
+ XML_SCHEMAV_ELEMCONT = 1810 : 1810
+ XML_SCHEMAV_HAVEDEFAULT = 1811 : 1811
+ XML_SCHEMAV_NOTNILLABLE = 1812 : 1812
+ XML_SCHEMAV_EXTRACONTENT = 1813 : 1813
+ XML_SCHEMAV_INVALIDATTR = 1814 : 1814
+ XML_SCHEMAV_INVALIDELEM = 1815 : 1815
+ XML_SCHEMAV_NOTDETERMINIST = 1816 : 1816
+ XML_SCHEMAV_CONSTRUCT = 1817 : 1817
+ XML_SCHEMAV_INTERNAL = 1818 : 1818
+ XML_SCHEMAV_NOTSIMPLE = 1819 : 1819
+ XML_SCHEMAV_ATTRUNKNOWN = 1820 : 1820
+ XML_SCHEMAV_ATTRINVALID = 1821 : 1821
+ XML_SCHEMAV_VALUE = 1822 : 1822
+ XML_SCHEMAV_FACET = 1823 : 1823
+ XML_SCHEMAV_CVC_DATATYPE_VALID_1_2_1 = 1824 : 1824
+ XML_SCHEMAV_CVC_DATATYPE_VALID_1_2_2 = 1825 : 1825
+ XML_SCHEMAV_CVC_DATATYPE_VALID_1_2_3 = 1826 : 1826
+ XML_SCHEMAV_CVC_TYPE_3_1_1 = 1827 : 1827
+ XML_SCHEMAV_CVC_TYPE_3_1_2 = 1828 : 1828
+ XML_SCHEMAV_CVC_FACET_VALID = 1829 : 1829
+ XML_SCHEMAV_CVC_LENGTH_VALID = 1830 : 1830
+ XML_SCHEMAV_CVC_MINLENGTH_VALID = 1831 : 1831
+ XML_SCHEMAV_CVC_MAXLENGTH_VALID = 1832 : 1832
+ XML_SCHEMAV_CVC_MININCLUSIVE_VALID = 1833 : 1833
+ XML_SCHEMAV_CVC_MAXINCLUSIVE_VALID = 1834 : 1834
+ XML_SCHEMAV_CVC_MINEXCLUSIVE_VALID = 1835 : 1835
+ XML_SCHEMAV_CVC_MAXEXCLUSIVE_VALID = 1836 : 1836
+ XML_SCHEMAV_CVC_TOTALDIGITS_VALID = 1837 : 1837
+ XML_SCHEMAV_CVC_FRACTIONDIGITS_VALID = 1838 : 1838
+ XML_SCHEMAV_CVC_PATTERN_VALID = 1839 : 1839
+ XML_SCHEMAV_CVC_ENUMERATION_VALID = 1840 : 1840
+ XML_SCHEMAV_CVC_COMPLEX_TYPE_2_1 = 1841 : 1841
+ XML_SCHEMAV_CVC_COMPLEX_TYPE_2_2 = 1842 : 1842
+ XML_SCHEMAV_CVC_COMPLEX_TYPE_2_3 = 1843 : 1843
+ XML_SCHEMAV_CVC_COMPLEX_TYPE_2_4 = 1844 : 1844
+ XML_SCHEMAV_CVC_ELT_1 = 1845 : 1845
+ XML_SCHEMAV_CVC_ELT_2 = 1846 : 1846
+ XML_SCHEMAV_CVC_ELT_3_1 = 1847 : 1847
+ XML_SCHEMAV_CVC_ELT_3_2_1 = 1848 : 1848
+ XML_SCHEMAV_CVC_ELT_3_2_2 = 1849 : 1849
+ XML_SCHEMAV_CVC_ELT_4_1 = 1850 : 1850
+ XML_SCHEMAV_CVC_ELT_4_2 = 1851 : 1851
+ XML_SCHEMAV_CVC_ELT_4_3 = 1852 : 1852
+ XML_SCHEMAV_CVC_ELT_5_1_1 = 1853 : 1853
+ XML_SCHEMAV_CVC_ELT_5_1_2 = 1854 : 1854
+ XML_SCHEMAV_CVC_ELT_5_2_1 = 1855 : 1855
+ XML_SCHEMAV_CVC_ELT_5_2_2_1 = 1856 : 1856
+ XML_SCHEMAV_CVC_ELT_5_2_2_2_1 = 1857 : 1857
+ XML_SCHEMAV_CVC_ELT_5_2_2_2_2 = 1858 : 1858
+ XML_SCHEMAV_CVC_ELT_6 = 1859 : 1859
+ XML_SCHEMAV_CVC_ELT_7 = 1860 : 1860
+ XML_SCHEMAV_CVC_ATTRIBUTE_1 = 1861 : 1861
+ XML_SCHEMAV_CVC_ATTRIBUTE_2 = 1862 : 1862
+ XML_SCHEMAV_CVC_ATTRIBUTE_3 = 1863 : 1863
+ XML_SCHEMAV_CVC_ATTRIBUTE_4 = 1864 : 1864
+ XML_SCHEMAV_CVC_COMPLEX_TYPE_3_1 = 1865 : 1865
+ XML_SCHEMAV_CVC_COMPLEX_TYPE_3_2_1 = 1866 : 1866
+ XML_SCHEMAV_CVC_COMPLEX_TYPE_3_2_2 = 1867 : 1867
+ XML_SCHEMAV_CVC_COMPLEX_TYPE_4 = 1868 : 1868
+ XML_SCHEMAV_CVC_COMPLEX_TYPE_5_1 = 1869 : 1869
+ XML_SCHEMAV_CVC_COMPLEX_TYPE_5_2 = 1870 : 1870
+ XML_SCHEMAV_ELEMENT_CONTENT = 1871 : 1871
+ XML_SCHEMAV_DOCUMENT_ELEMENT_MISSING = 1872 : 1872
+ XML_SCHEMAV_CVC_COMPLEX_TYPE_1 = 1873 : 1873
+ XML_SCHEMAV_CVC_AU = 1874 : 1874
+ XML_SCHEMAV_CVC_TYPE_1 = 1875 : 1875
+ XML_SCHEMAV_CVC_TYPE_2 = 1876 : 1876
+ XML_SCHEMAV_CVC_IDC = 1877 : 1877
+ XML_SCHEMAV_CVC_WILDCARD = 1878 : 1878
+ XML_XPTR_UNKNOWN_SCHEME = 1900
+ XML_XPTR_CHILDSEQ_START = 1901 : 1901
+ XML_XPTR_EVAL_FAILED = 1902 : 1902
+ XML_XPTR_EXTRA_OBJECTS = 1903 : 1903
+ XML_C14N_CREATE_CTXT = 1950
+ XML_C14N_REQUIRES_UTF8 = 1951 : 1951
+ XML_C14N_CREATE_STACK = 1952 : 1952
+ XML_C14N_INVALID_NODE = 1953 : 1953
+ XML_C14N_UNKNOW_NODE = 1954 : 1954
+ XML_C14N_RELATIVE_NAMESPACE = 1955 : 1955
+ XML_FTP_PASV_ANSWER = 2000
+ XML_FTP_EPSV_ANSWER = 2001 : 2001
+ XML_FTP_ACCNT = 2002 : 2002
+ XML_FTP_URL_SYNTAX = 2003 : 2003
+ XML_HTTP_URL_SYNTAX = 2020
+ XML_HTTP_USE_IP = 2021 : 2021
+ XML_HTTP_UNKNOWN_HOST = 2022 : 2022
+ XML_SCHEMAP_SRC_SIMPLE_TYPE_1 = 3000
+ XML_SCHEMAP_SRC_SIMPLE_TYPE_2 = 3001 : 3001
+ XML_SCHEMAP_SRC_SIMPLE_TYPE_3 = 3002 : 3002
+ XML_SCHEMAP_SRC_SIMPLE_TYPE_4 = 3003 : 3003
+ XML_SCHEMAP_SRC_RESOLVE = 3004 : 3004
+ XML_SCHEMAP_SRC_RESTRICTION_BASE_OR_SIMPLETYPE = 3005 : 3005
+ XML_SCHEMAP_SRC_LIST_ITEMTYPE_OR_SIMPLETYPE = 3006 : 3006
+ XML_SCHEMAP_SRC_UNION_MEMBERTYPES_OR_SIMPLETYPES = 3007 : 3007
+ XML_SCHEMAP_ST_PROPS_CORRECT_1 = 3008 : 3008
+ XML_SCHEMAP_ST_PROPS_CORRECT_2 = 3009 : 3009
+ XML_SCHEMAP_ST_PROPS_CORRECT_3 = 3010 : 3010
+ XML_SCHEMAP_COS_ST_RESTRICTS_1_1 = 3011 : 3011
+ XML_SCHEMAP_COS_ST_RESTRICTS_1_2 = 3012 : 3012
+ XML_SCHEMAP_COS_ST_RESTRICTS_1_3_1 = 3013 : 3013
+ XML_SCHEMAP_COS_ST_RESTRICTS_1_3_2 = 3014 : 3014
+ XML_SCHEMAP_COS_ST_RESTRICTS_2_1 = 3015 : 3015
+ XML_SCHEMAP_COS_ST_RESTRICTS_2_3_1_1 = 3016 : 3016
+ XML_SCHEMAP_COS_ST_RESTRICTS_2_3_1_2 = 3017 : 3017
+ XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_1 = 3018 : 3018
+ XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_2 = 3019 : 3019
+ XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_3 = 3020 : 3020
+ XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_4 = 3021 : 3021
+ XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_5 = 3022 : 3022
+ XML_SCHEMAP_COS_ST_RESTRICTS_3_1 = 3023 : 3023
+ XML_SCHEMAP_COS_ST_RESTRICTS_3_3_1 = 3024 : 3024
+ XML_SCHEMAP_COS_ST_RESTRICTS_3_3_1_2 = 3025 : 3025
+ XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_2 = 3026 : 3026
+ XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_1 = 3027 : 3027
+ XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_3 = 3028 : 3028
+ XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_4 = 3029 : 3029
+ XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_5 = 3030 : 3030
+ XML_SCHEMAP_COS_ST_DERIVED_OK_2_1 = 3031 : 3031
+ XML_SCHEMAP_COS_ST_DERIVED_OK_2_2 = 3032 : 3032
+ XML_SCHEMAP_S4S_ELEM_NOT_ALLOWED = 3033 : 3033
+ XML_SCHEMAP_S4S_ELEM_MISSING = 3034 : 3034
+ XML_SCHEMAP_S4S_ATTR_NOT_ALLOWED = 3035 : 3035
+ XML_SCHEMAP_S4S_ATTR_MISSING = 3036 : 3036
+ XML_SCHEMAP_S4S_ATTR_INVALID_VALUE = 3037 : 3037
+ XML_SCHEMAP_SRC_ELEMENT_1 = 3038 : 3038
+ XML_SCHEMAP_SRC_ELEMENT_2_1 = 3039 : 3039
+ XML_SCHEMAP_SRC_ELEMENT_2_2 = 3040 : 3040
+ XML_SCHEMAP_SRC_ELEMENT_3 = 3041 : 3041
+ XML_SCHEMAP_P_PROPS_CORRECT_1 = 3042 : 3042
+ XML_SCHEMAP_P_PROPS_CORRECT_2_1 = 3043 : 3043
+ XML_SCHEMAP_P_PROPS_CORRECT_2_2 = 3044 : 3044
+ XML_SCHEMAP_E_PROPS_CORRECT_2 = 3045 : 3045
+ XML_SCHEMAP_E_PROPS_CORRECT_3 = 3046 : 3046
+ XML_SCHEMAP_E_PROPS_CORRECT_4 = 3047 : 3047
+ XML_SCHEMAP_E_PROPS_CORRECT_5 = 3048 : 3048
+ XML_SCHEMAP_E_PROPS_CORRECT_6 = 3049 : 3049
+ XML_SCHEMAP_SRC_INCLUDE = 3050 : 3050
+ XML_SCHEMAP_SRC_ATTRIBUTE_1 = 3051 : 3051
+ XML_SCHEMAP_SRC_ATTRIBUTE_2 = 3052 : 3052
+ XML_SCHEMAP_SRC_ATTRIBUTE_3_1 = 3053 : 3053
+ XML_SCHEMAP_SRC_ATTRIBUTE_3_2 = 3054 : 3054
+ XML_SCHEMAP_SRC_ATTRIBUTE_4 = 3055 : 3055
+ XML_SCHEMAP_NO_XMLNS = 3056 : 3056
+ XML_SCHEMAP_NO_XSI = 3057 : 3057
+ XML_SCHEMAP_COS_VALID_DEFAULT_1 = 3058 : 3058
+ XML_SCHEMAP_COS_VALID_DEFAULT_2_1 = 3059 : 3059
+ XML_SCHEMAP_COS_VALID_DEFAULT_2_2_1 = 3060 : 3060
+ XML_SCHEMAP_COS_VALID_DEFAULT_2_2_2 = 3061 : 3061
+ XML_SCHEMAP_CVC_SIMPLE_TYPE = 3062 : 3062
+ XML_SCHEMAP_COS_CT_EXTENDS_1_1 = 3063 : 3063
+ XML_SCHEMAP_SRC_IMPORT_1_1 = 3064 : 3064
+ XML_SCHEMAP_SRC_IMPORT_1_2 = 3065 : 3065
+ XML_SCHEMAP_SRC_IMPORT_2 = 3066 : 3066
+ XML_SCHEMAP_SRC_IMPORT_2_1 = 3067 : 3067
+ XML_SCHEMAP_SRC_IMPORT_2_2 = 3068 : 3068
+ XML_SCHEMAP_INTERNAL = 3069 : 3069 non-W3C
+ XML_SCHEMAP_NOT_DETERMINISTIC = 3070 : 3070 non-W3C
+ XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_1 = 3071 : 3071
+ XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_2 = 3072 : 3072
+ XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_3 = 3073 : 3073
+ XML_SCHEMAP_MG_PROPS_CORRECT_1 = 3074 : 3074
+ XML_SCHEMAP_MG_PROPS_CORRECT_2 = 3075 : 3075
+ XML_SCHEMAP_SRC_CT_1 = 3076 : 3076
+ XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_3 = 3077 : 3077
+ XML_SCHEMAP_AU_PROPS_CORRECT_2 = 3078 : 3078
+ XML_SCHEMAP_A_PROPS_CORRECT_2 = 3079 : 3079
+ XML_SCHEMAP_C_PROPS_CORRECT = 3080 : 3080
+ XML_SCHEMAP_SRC_REDEFINE = 3081 : 3081
+ XML_SCHEMAP_SRC_IMPORT = 3082 : 3082
+ XML_SCHEMAP_WARN_SKIP_SCHEMA = 3083 : 3083
+ XML_SCHEMAP_WARN_UNLOCATED_SCHEMA = 3084 : 3084
+ XML_SCHEMAP_WARN_ATTR_REDECL_PROH = 3085 : 3085
+ XML_SCHEMAP_WARN_ATTR_POINTLESS_PROH = 3086 : 3085
+ XML_SCHEMAP_AG_PROPS_CORRECT = 3087 : 3086
+ XML_SCHEMAP_COS_CT_EXTENDS_1_2 = 3088 : 3087
+ XML_SCHEMAP_AU_PROPS_CORRECT = 3089 : 3088
+ XML_SCHEMAP_A_PROPS_CORRECT_3 = 3090 : 3089
+ XML_SCHEMAP_COS_ALL_LIMITED = 3091 : 3090
+ XML_MODULE_OPEN = 4900 : 4900
+ XML_MODULE_CLOSE = 4901 : 4901
+ XML_CHECK_FOUND_ELEMENT = 5000
+ XML_CHECK_FOUND_ATTRIBUTE = 5001 : 5001
+ XML_CHECK_FOUND_TEXT = 5002 : 5002
+ XML_CHECK_FOUND_CDATA = 5003 : 5003
+ XML_CHECK_FOUND_ENTITYREF = 5004 : 5004
+ XML_CHECK_FOUND_ENTITY = 5005 : 5005
+ XML_CHECK_FOUND_PI = 5006 : 5006
+ XML_CHECK_FOUND_COMMENT = 5007 : 5007
+ XML_CHECK_FOUND_DOCTYPE = 5008 : 5008
+ XML_CHECK_FOUND_FRAGMENT = 5009 : 5009
+ XML_CHECK_FOUND_NOTATION = 5010 : 5010
+ XML_CHECK_UNKNOWN_NODE = 5011 : 5011
+ XML_CHECK_ENTITY_TYPE = 5012 : 5012
+ XML_CHECK_NO_PARENT = 5013 : 5013
+ XML_CHECK_NO_DOC = 5014 : 5014
+ XML_CHECK_NO_NAME = 5015 : 5015
+ XML_CHECK_NO_ELEM = 5016 : 5016
+ XML_CHECK_WRONG_DOC = 5017 : 5017
+ XML_CHECK_NO_PREV = 5018 : 5018
+ XML_CHECK_WRONG_PREV = 5019 : 5019
+ XML_CHECK_NO_NEXT = 5020 : 5020
+ XML_CHECK_WRONG_NEXT = 5021 : 5021
+ XML_CHECK_NOT_DTD = 5022 : 5022
+ XML_CHECK_NOT_ATTR = 5023 : 5023
+ XML_CHECK_NOT_ATTR_DECL = 5024 : 5024
+ XML_CHECK_NOT_ELEM_DECL = 5025 : 5025
+ XML_CHECK_NOT_ENTITY_DECL = 5026 : 5026
+ XML_CHECK_NOT_NS_DECL = 5027 : 5027
+ XML_CHECK_NO_HREF = 5028 : 5028
+ XML_CHECK_WRONG_PARENT = 5029 : 5029
+ XML_CHECK_NS_SCOPE = 5030 : 5030
+ XML_CHECK_NS_ANCESTOR = 5031 : 5031
+ XML_CHECK_NOT_UTF8 = 5032 : 5032
+ XML_CHECK_NO_DICT = 5033 : 5033
+ XML_CHECK_NOT_NCNAME = 5034 : 5034
+ XML_CHECK_OUTSIDE_DICT = 5035 : 5035
+ XML_CHECK_WRONG_NAME = 5036 : 5036
+ XML_CHECK_NAME_NOT_NULL = 5037 : 5037
+ XML_I18N_NO_NAME = 6000
+ XML_I18N_NO_HANDLER = 6001 : 6001
+ XML_I18N_EXCESS_HANDLER = 6002 : 6002
+ XML_I18N_CONV_FAILED = 6003 : 6003
+ XML_I18N_NO_OUTPUT = 6004 : 6004
+ XML_CHECK_ = 6005 : 5033
+ XML_CHECK_X = 6006 : 503
+"""
+
+__initErrorConstants()
From scoder at codespeak.net Mon May 29 10:49:13 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 29 May 2006 10:49:13 +0200 (CEST)
Subject: [Lxml-checkins] r27825 - lxml/trunk
Message-ID: <20060529084913.9A32A10053@code0.codespeak.net>
Author: scoder
Date: Mon May 29 10:49:12 2006
New Revision: 27825
Modified:
lxml/trunk/CREDITS.txt
Log:
credits for noah
Modified: lxml/trunk/CREDITS.txt
==============================================================================
--- lxml/trunk/CREDITS.txt (original)
+++ lxml/trunk/CREDITS.txt Mon May 29 10:49:12 2006
@@ -32,6 +32,8 @@
David Sankel - building statically on Windows
+Noah Slater - lots of bug squeezing
+
Duncan Booth - bugfixing
Dean Pavlekovic - bug reporting
From scoder at codespeak.net Mon May 29 11:26:04 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 29 May 2006 11:26:04 +0200 (CEST)
Subject: [Lxml-checkins] r27827 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20060529092604.3B11910057@code0.codespeak.net>
Author: scoder
Date: Mon May 29 11:25:59 2006
New Revision: 27827
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/c14n.pxd
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/serializer.pxi
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
fixed resetting element namespace, rewrite of C14N handling to use file-like objects and provide error messages
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Mon May 29 11:25:59 2006
@@ -7,6 +7,8 @@
Features added
--------------
+* Writing C14N no longer serializes in memory (reduced memory footprint)
+
* PyErrorLog for error logging through the Python ``logging`` module
* ``element.getroottree()`` returns an ElementTree for the root node of the
@@ -22,6 +24,9 @@
Bugs fixed
----------
+* Setting namespace-less tag names on namespaced elements ('{ns}t' -> 't')
+ didn't reset the namespace
+
* Unknown constants from newer libxml2 versions could raise exceptions in the
error handlers
Modified: lxml/trunk/src/lxml/c14n.pxd
==============================================================================
--- lxml/trunk/src/lxml/c14n.pxd (original)
+++ lxml/trunk/src/lxml/c14n.pxd Mon May 29 11:25:59 2006
@@ -1,4 +1,4 @@
-from tree cimport xmlDoc
+from tree cimport xmlDoc, xmlOutputBuffer
from xpath cimport xmlNodeSet
cdef extern from "libxml/c14n.h":
@@ -8,4 +8,19 @@
char** inclusive_ns_prefixes,
int with_comments,
char** doc_txt_ptr)
+
+ cdef int xmlC14NDocSave(xmlDoc* doc,
+ xmlNodeSet* nodes,
+ int exclusive,
+ char** inclusive_ns_prefixes,
+ int with_comments,
+ char* filename,
+ int compression)
+
+ cdef int xmlC14NDocSaveTo(xmlDoc* doc,
+ xmlNodeSet* nodes,
+ int exclusive,
+ char** inclusive_ns_prefixes,
+ int with_comments,
+ xmlOutputBuffer* buffer)
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Mon May 29 11:25:59 2006
@@ -244,6 +244,8 @@
object node_ns_utf, object nsmap):
"""Lookup current namespace prefixes, then set namespace structure for
node and register new ns-prefix mappings.
+
+ This only works for a newly created node!
"""
cdef xmlNs* c_ns
cdef xmlDoc* c_doc
@@ -251,7 +253,7 @@
cdef char* c_href
if not nsmap:
if node_ns_utf is not None:
- self._setNodeNs(c_node, node_ns_utf)
+ self._setNodeNs(c_node, _cstr(node_ns_utf))
return
c_doc = self._c_doc
@@ -272,7 +274,7 @@
node_ns_utf = None
if node_ns_utf is not None:
- self._setNodeNs(c_node, node_ns_utf)
+ self._setNodeNs(c_node, _cstr(node_ns_utf))
cdef _Document _documentFactory(xmlDoc* c_doc, _BaseParser parser):
cdef _Document result
@@ -510,7 +512,7 @@
self._assertHasRoot()
schema = XMLSchema(xmlschema)
return schema.validate(self)
-
+
def xinclude(self):
"""Process this document, including using XInclude.
"""
@@ -525,30 +527,13 @@
result = xinclude.xmlXIncludeProcessTree(self._context_node._c_node)
if result == -1:
raise XIncludeError, "XInclude processing failed"
-
+
def write_c14n(self, file):
"""C14N write of document. Always writes UTF-8.
"""
- cdef xmlDoc* c_base_doc
- cdef xmlDoc* c_doc
- cdef char* data
- cdef int bytes
self._assertHasRoot()
- c_base_doc = self._context_node._doc._c_doc
+ _tofilelikeC14N(file, self._context_node)
- c_doc = _fakeRootDoc(c_base_doc, self._context_node._c_node)
- bytes = c14n.xmlC14NDocDumpMemory(c_doc, NULL, 0, NULL, 1, &data)
- _destroyFakeDoc(c_base_doc, c_doc)
-
- if bytes < 0:
- raise C14NError, "C14N failed"
- try:
- if not hasattr(file, 'write'):
- file = open(file, 'wb')
- file.write(data)
- finally:
- tree.xmlFree(data)
-
cdef _ElementTree _elementTreeFactory(_Document doc,
_NodeBase context_node):
return _newElementTree(doc, context_node, _ElementTree)
@@ -707,13 +692,13 @@
return self._tag
def __set__(self, value):
- cdef xmlNs* c_ns
ns, text = _getNsTag(value)
self._tag = value
tree.xmlNodeSetName(self._c_node, _cstr(text))
if ns is None:
- return
- self._doc._setNodeNs(self._c_node, _cstr(ns))
+ self._c_node.ns = NULL
+ else:
+ self._doc._setNodeNs(self._c_node, _cstr(ns))
# not in ElementTree, read-only
property prefix:
@@ -982,7 +967,6 @@
else:
assert 0, "Unknown node type: %s" % c_node.type
result = element_class()
- result._tag = None
result._doc = doc
result._c_node = c_node
result._proxy_type = PROXY_ELEMENT
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Mon May 29 11:25:59 2006
@@ -4,6 +4,9 @@
cimport htmlparser
from xmlparser cimport xmlParserCtxt, xmlDict
+# initialize parser (and threading)
+xmlparser.xmlInitParser()
+
class XMLSyntaxError(LxmlSyntaxError):
pass
@@ -28,11 +31,6 @@
if self._c_dict is not NULL:
xmlparser.xmlDictFree(self._c_dict)
- cdef void _initParser(self):
- if not self._initialized:
- xmlparser.xmlInitParser()
- self._initialized = 1
-
cdef void _initParserDict(self, xmlParserCtxt* pctxt):
"Assure we always use the same string dictionary."
if self._c_dict is NULL or self._c_dict is pctxt.dict:
@@ -596,7 +594,6 @@
cdef Py_ssize_t c_len
if parser is None:
parser = __DEFAULT_PARSER
- __GLOBAL_PARSER_CONTEXT._initParser()
if not filename:
c_filename = NULL
else:
@@ -611,7 +608,6 @@
cdef xmlDoc* _parseDocFromFile(filename, _BaseParser parser) except NULL:
if parser is None:
parser = __DEFAULT_PARSER
- __GLOBAL_PARSER_CONTEXT._initParser()
return (<_BaseParser>parser)._parseDocFromFile(_cstr(filename))
cdef xmlDoc* _parseDocFromFilelike(source, filename,
@@ -619,7 +615,6 @@
cdef char* c_filename
if parser is None:
parser = __DEFAULT_PARSER
- __GLOBAL_PARSER_CONTEXT._initParser()
if not filename:
c_filename = NULL
else:
Modified: lxml/trunk/src/lxml/serializer.pxi
==============================================================================
--- lxml/trunk/src/lxml/serializer.pxi (original)
+++ lxml/trunk/src/lxml/serializer.pxi Mon May 29 11:25:59 2006
@@ -100,12 +100,14 @@
cdef class _FilelikeWriter:
cdef object _filelike
cdef _ExceptionContext _exc_context
+ cdef _ErrorLog error_log
def __init__(self, filelike, exc_context=None):
self._filelike = filelike
if exc_context is None:
self._exc_context = _ExceptionContext()
else:
self._exc_context = exc_context
+ self.error_log = _ErrorLog()
cdef tree.xmlOutputBuffer* _createOutputBuffer(
self, tree.xmlCharEncodingHandler* enchandler) except NULL:
@@ -173,6 +175,42 @@
if writer is not None:
writer._exc_context._raise_if_stored()
+cdef _tofilelikeC14N(f, _NodeBase element):
+ cdef _FilelikeWriter writer
+ cdef tree.xmlOutputBuffer* c_buffer
+ cdef xmlDoc* c_base_doc
+ cdef xmlDoc* c_doc
+ cdef int bytes
+
+ c_base_doc = element._c_node.doc
+ c_doc = _fakeRootDoc(c_base_doc, element._c_node)
+ try:
+ if python.PyString_Check(f) or python.PyUnicode_Check(f):
+ filename = _utf8(f)
+ bytes = c14n.xmlC14NDocSave(c_doc, NULL, 0, NULL, 1,
+ _cstr(filename), 0)
+ elif hasattr(f, 'write'):
+ writer = _FilelikeWriter(f)
+ c_buffer = writer._createOutputBuffer(NULL)
+ writer.error_log.connect()
+ bytes = c14n.xmlC14NDocSaveTo(c_doc, NULL, 0, NULL, 1, c_buffer)
+ writer.error_log.disconnect()
+ tree.xmlOutputBufferClose(c_buffer)
+ else:
+ raise TypeError, "File or filename expected, got '%s'" % type(f)
+ finally:
+ _destroyFakeDoc(c_base_doc, c_doc)
+
+ if writer is not None:
+ writer._exc_context._raise_if_stored()
+
+ if bytes < 0:
+ if writer is not None and len(writer.error_log):
+ message = writer.error_log[0].message
+ else:
+ message = "C14N failed"
+ raise C14NError, message
+
# dump node to file (mainly for debug)
cdef _dumpToFile(f, xmlNode* c_node, int pretty_print):
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Mon May 29 11:25:59 2006
@@ -823,7 +823,7 @@
self.assertXML(
' C2 ',
a)
-
+
def test_tag_write(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
@@ -841,6 +841,43 @@
' ',
a)
+ def test_tag_reset_ns(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+ tostring = self.etree.tostring
+
+ a = Element('{a}a')
+ b1 = SubElement(a, '{a}b')
+ b2 = SubElement(a, '{b}b')
+
+ self.assertEquals('{a}b', b1.tag)
+
+ b1.tag = 'c'
+
+ # can't use C14N here!
+ self.assertEquals('c', b1.tag)
+ self.assertEquals('
Author: scoder
Date: Mon May 29 11:44:16 2006
New Revision: 27830
Modified:
lxml/trunk/doc/FAQ.txt
lxml/trunk/src/lxml/parser.pxi
Log:
cleanup, extended FAQ section on multi-threading
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Mon May 29 11:44:16 2006
@@ -64,21 +64,26 @@
#) Can I use threads to concurrently access the lxml API?
You should be able to use lxml in a multi-threaded environment, although
- this is not very well tested. Note that lxml does not provide any
- thread-safety by itself (mainly for performance reasons), so you have to
- take care when you use parts of the API concurrently. Most importantly,
- you must not forget to call ``etree.initThread()`` from each newly
- generated thread to initialize lxml and libxml2 for the new thread context.
- If you call API functions from a thread without having called this function
- first, lxml can easily crash your program.
+ support is limited and not very well tested. Note that lxml does not
+ provide any thread-safety by itself (mainly for performance reasons), so
+ you have to take care when you use parts of the API concurrently. Most
+ importantly, you must not forget to call ``etree.initThread()`` from each
+ newly generated thread to initialize lxml and libxml2 for the new thread
+ context. If you call API functions from a thread without having called
+ this function first, lxml can easily crash your program.
+
+ Tree modification is not thread-safe, so you must take care to properly
+ serialize modifications. Reading from a tree concurrently should not
+ produce any problems (otherwise it is a bug).
Basically none of the API classes is thread-safe, including parsers, XPath,
- XSLT and the validators. You cannot use such an object concurrently.
- However, it is perfectly viable to create independent instances for each
- thread. This is a cheap thing to do for parsers, but more expensive for
- XSLT and validators, which have to compile trees recursively. So you might
- want to consider a thread pool approach or threaded processing chains to
- reduce the overhead if you require threading here.
+ XSLT and the validators. Each of them represents a stateful object that
+ cannot be used concurrently. However, it is perfectly viable to create
+ independent instances for each thread. This is a cheap thing to do for
+ parsers, but more expensive for XSLT and validators, which have to compile
+ trees recursively. So you might want to consider a thread pool approach or
+ threaded processing chains to reduce the overhead if you require threading
+ here.
#) What are the ``findall()`` and ``xpath()`` methods on Element(Tree)?
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Mon May 29 11:44:16 2006
@@ -21,11 +21,8 @@
"""Global parser context to share the string dictionary.
"""
cdef xmlDict* _c_dict
- cdef int _initialized
-
def __init__(self):
self._c_dict = NULL
- self._initialized = 0
def __dealloc__(self):
if self._c_dict is not NULL:
From scoder at codespeak.net Mon May 29 12:29:08 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 29 May 2006 12:29:08 +0200 (CEST)
Subject: [Lxml-checkins] r27835 - lxml/trunk/doc
Message-ID: <20060529102908.456131006D@code0.codespeak.net>
Author: scoder
Date: Mon May 29 12:29:06 2006
New Revision: 27835
Modified:
lxml/trunk/doc/FAQ.txt
Log:
FAQ entry on pretty printing, more on threading
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Mon May 29 12:29:06 2006
@@ -64,17 +64,18 @@
#) Can I use threads to concurrently access the lxml API?
You should be able to use lxml in a multi-threaded environment, although
- support is limited and not very well tested. Note that lxml does not
- provide any thread-safety by itself (mainly for performance reasons), so
- you have to take care when you use parts of the API concurrently. Most
- importantly, you must not forget to call ``etree.initThread()`` from each
- newly generated thread to initialize lxml and libxml2 for the new thread
- context. If you call API functions from a thread without having called
- this function first, lxml can easily crash your program.
+ this is not very well tested. For performance reasons, lxml.etree provides
+ only very limited thread-safety by itself, so you have to take care when
+ you use parts of the API concurrently. Most importantly, you must not
+ forget to call ``etree.initThread()`` from each newly generated thread to
+ initialize lxml and libxml2 for the new thread context. If you call API
+ functions from a thread without having called this function first, lxml can
+ behave unexpectedly and even crash your program. This is not considered a
+ bug in lxml, it is a bug in your code.
Tree modification is not thread-safe, so you must take care to properly
- serialize modifications. Reading from a tree concurrently should not
- produce any problems (otherwise it is a bug).
+ serialize modifications. Reading and traversing a tree concurrently should
+ not produce any problems (otherwise it is a bug).
Basically none of the API classes is thread-safe, including parsers, XPath,
XSLT and the validators. Each of them represents a stateful object that
@@ -85,6 +86,32 @@
threaded processing chains to reduce the overhead if you require threading
here.
+ This said, if you have problems with thread support or ideas how to improve
+ it, we would like to hear about it through the mailing list.
+
+
+#) Why doesn't the ``pretty_print`` option reformat my XML output?
+
+ Pretty printing (or formatting) an XML document means adding white space to
+ the content. These modifications are harmless if they only impact elements
+ in the document that do not carry (text) data. They corrupt your data if
+ they impact elements that contain data. The only way to distinguish
+ between harmless and harmful modification is structural information about
+ the document.
+
+ If lxml cannot distinguish between whitespace and data, it will not alter
+ your data. The best way to tell lxml where whitespace can be safely added
+ and removed is allowing the parser to load the DTD (which obviously
+ requires the DTD to be accessible)::
+
+ >>> tree = etree.parse(file, etree.XMLParser(load_dtd=True))
+
+ This will allow the parser to drop so-called 'ignorable whitespace' that is
+ not considered data (i.e. not part of the XML infoset). If you now call a
+ serialization function to pretty print this tree, it will use the
+ structural information it has to determine the correct places where it can
+ add whitespace to the XML tree.
+
#) What are the ``findall()`` and ``xpath()`` methods on Element(Tree)?
From scoder at codespeak.net Mon May 29 15:50:48 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 29 May 2006 15:50:48 +0200 (CEST)
Subject: [Lxml-checkins] r27854 - lxml/trunk/doc
Message-ID: <20060529135048.A5D2C10036@code0.codespeak.net>
Author: scoder
Date: Mon May 29 15:50:46 2006
New Revision: 27854
Modified:
lxml/trunk/doc/FAQ.txt
Log:
rewrite of FAQ threading section to say: doesn't work
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Mon May 29 15:50:46 2006
@@ -63,31 +63,13 @@
#) Can I use threads to concurrently access the lxml API?
- You should be able to use lxml in a multi-threaded environment, although
- this is not very well tested. For performance reasons, lxml.etree provides
- only very limited thread-safety by itself, so you have to take care when
- you use parts of the API concurrently. Most importantly, you must not
- forget to call ``etree.initThread()`` from each newly generated thread to
- initialize lxml and libxml2 for the new thread context. If you call API
- functions from a thread without having called this function first, lxml can
- behave unexpectedly and even crash your program. This is not considered a
- bug in lxml, it is a bug in your code.
-
- Tree modification is not thread-safe, so you must take care to properly
- serialize modifications. Reading and traversing a tree concurrently should
- not produce any problems (otherwise it is a bug).
-
- Basically none of the API classes is thread-safe, including parsers, XPath,
- XSLT and the validators. Each of them represents a stateful object that
- cannot be used concurrently. However, it is perfectly viable to create
- independent instances for each thread. This is a cheap thing to do for
- parsers, but more expensive for XSLT and validators, which have to compile
- trees recursively. So you might want to consider a thread pool approach or
- threaded processing chains to reduce the overhead if you require threading
- here.
+ Short answer: No.
- This said, if you have problems with thread support or ideas how to improve
- it, we would like to hear about it through the mailing list.
+ Long answer: lxml does not currently release the GIL (Python's global
+ interpreter lock) internally, so you will not benefit from any performance
+ improvements by using threads. It is also not trivial to free the GIL, as
+ lxml calls back into Python in many places during XML processing: extension
+ functions, Python resolvers, error reporting, etc.
#) Why doesn't the ``pretty_print`` option reformat my XML output?
From scoder at codespeak.net Mon May 29 16:33:40 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 29 May 2006 16:33:40 +0200 (CEST)
Subject: [Lxml-checkins] r27857 - lxml/trunk/doc
Message-ID: <20060529143340.18DB010053@code0.codespeak.net>
Author: scoder
Date: Mon May 29 16:33:38 2006
New Revision: 27857
Modified:
lxml/trunk/doc/FAQ.txt
Log:
rewrote pretty print section in FAQ
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Mon May 29 16:33:38 2006
@@ -77,22 +77,19 @@
Pretty printing (or formatting) an XML document means adding white space to
the content. These modifications are harmless if they only impact elements
in the document that do not carry (text) data. They corrupt your data if
- they impact elements that contain data. The only way to distinguish
- between harmless and harmful modification is structural information about
- the document.
-
- If lxml cannot distinguish between whitespace and data, it will not alter
- your data. The best way to tell lxml where whitespace can be safely added
- and removed is allowing the parser to load the DTD (which obviously
- requires the DTD to be accessible)::
-
- >>> tree = etree.parse(file, etree.XMLParser(load_dtd=True))
-
- This will allow the parser to drop so-called 'ignorable whitespace' that is
- not considered data (i.e. not part of the XML infoset). If you now call a
- serialization function to pretty print this tree, it will use the
- structural information it has to determine the correct places where it can
- add whitespace to the XML tree.
+ they impact elements that contain data. If lxml cannot distinguish between
+ whitespace and data, it will not alter your data. Whitespace is therefore
+ only added between nodes that do not contain data. This is always the case
+ for trees constructed element-by-element, so no problems should be expected
+ here. For parsed trees, a good way to assure that no conflicting
+ whitespace is left in the tree is the ``?gnore_blanks`` option::
+
+ >>> parser = etree.XMLParser(ignore_blanks=True)
+ >>> tree = etree.parse(file, parser)
+
+ This will allow the parser to drop blank text nodes when constructing the
+ tree. If you now call a serialization function to pretty print this tree,
+ lxml can add fresh whitespace to the XML tree to indent it.
#) What are the ``findall()`` and ``xpath()`` methods on Element(Tree)?
From scoder at codespeak.net Mon May 29 16:39:54 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 29 May 2006 16:39:54 +0200 (CEST)
Subject: [Lxml-checkins] r27858 - lxml/trunk/src/lxml
Message-ID: <20060529143954.49CFE10053@code0.codespeak.net>
Author: scoder
Date: Mon May 29 16:39:50 2006
New Revision: 27858
Modified:
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/tree.pxd
lxml/trunk/src/lxml/xmlerror.pxd
lxml/trunk/src/lxml/xmlerror.pxi
Log:
restructuring in thread setup, use thread default values for configuration, no longer uses KeepBlanksDefault (more XML compliant)
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Mon May 29 16:39:50 2006
@@ -40,10 +40,21 @@
# make the compiled-in debug state publicly available
DEBUG = __DEBUG
+# global per-thread setup
+tree.xmlThrDefIndentTreeOutput(1)
+tree.xmlThrDefLineNumbersDefaultValue(1)
+
+_initThreadLogging()
+
+# initialize parser (and threading)
+xmlparser.xmlInitParser()
+
def initThread():
- "Call this method to set up the library from within a new thread."
- _initThreadLogging()
- tree.xmlKeepBlanksDefault(0)
+ """Must be called by each newly created thread before calling any API
+ functions."""
+ #_initThreadLogging()
+ pass
+
# Error superclass for ElementTree compatibility
class Error(Exception):
@@ -1258,6 +1269,7 @@
cdef void _prepareNextNode(self):
cdef _NodeBase node
cdef xmlNode* c_node
+ cdef xmlNode* c_next_node
cdef xmlNode* c_parent
# find in descendants
node = self._next_node
@@ -1287,11 +1299,12 @@
# we are at a sibling, so set c_parent to our parent
c_parent = c_parent.parent
- self._next_node = _elementFactory(node._doc, c_node)
+ c_next_node = c_node
# fix depth counter by looking up path to original parent
while c_node is not c_parent:
self._depth = self._depth + 1
c_node = c_node.parent
+ self._next_node = _elementFactory(node._doc, c_next_node)
cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf) except NULL:
cdef xmlNode* c_node
@@ -1523,6 +1536,3 @@
include "relaxng.pxi" # RelaxNG
include "xmlschema.pxi" # XMLSchema
-
-# configure main thread
-initThread()
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Mon May 29 16:39:50 2006
@@ -4,9 +4,6 @@
cimport htmlparser
from xmlparser cimport xmlParserCtxt, xmlDict
-# initialize parser (and threading)
-xmlparser.xmlInitParser()
-
class XMLSyntaxError(LxmlSyntaxError):
pass
@@ -449,13 +446,14 @@
* no_network - prevent network access
* ns_clean - clean up redundant namespace declarations
* recover - try hard to parse through broken XML
+ * ignore_blanks - discard blank text nodes
Note that you must not share parsers between threads. This applies also
to the default parser.
"""
def __init__(self, attribute_defaults=False, dtd_validation=False,
load_dtd=False, no_network=False, ns_clean=False,
- recover=False):
+ recover=False, ignore_blanks=False):
cdef int parse_options
_BaseParser.__init__(self)
@@ -474,6 +472,8 @@
parse_options = parse_options | xmlparser.XML_PARSE_NSCLEAN
if recover:
parse_options = parse_options | xmlparser.XML_PARSE_RECOVER
+ if ignore_blanks:
+ parse_options = parse_options | xmlparser.XML_PARSE_NOBLANKS
self._parse_options = parse_options
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Mon May 29 16:39:50 2006
@@ -228,6 +228,7 @@
FILE* file, xmlCharEncodingHandler* encoder)
cdef xmlOutputBuffer* xmlOutputBufferCreateFilename(
char* URI, xmlCharEncodingHandler* encoder, int compression)
+
cdef extern from "libxml/xmlsave.h":
ctypedef struct xmlSaveCtxt:
pass
@@ -236,6 +237,11 @@
int options)
cdef long xmlSaveDoc(xmlSaveCtxt* ctxt, xmlDoc* doc)
cdef int xmlSaveClose(xmlSaveCtxt* ctxt)
+
+cdef extern from "libxml/globals.h":
+ cdef int xmlThrDefKeepBlanksDefaultValue(int onoff)
+ cdef int xmlThrDefLineNumbersDefaultValue(int onoff)
+ cdef int xmlThrDefIndentTreeOutput(int onoff)
cdef extern from "libxml/xmlstring.h":
cdef char* xmlStrdup(char* cur)
Modified: lxml/trunk/src/lxml/xmlerror.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxd (original)
+++ lxml/trunk/src/lxml/xmlerror.pxd Mon May 29 16:39:50 2006
@@ -14,10 +14,11 @@
char* file
int line
- cdef void xmlSetGenericErrorFunc(void* ctxt,
- void (*handler)(void* ctxt, char* msg, ...))
- cdef void xmlSetStructuredErrorFunc(void* ctxt,
- void (*handler)(void* userData, xmlError* error))
+ ctypedef void (*xmlGenericErrorFunc)(void* ctxt, char* msg, ...)
+ ctypedef void (*xmlStructuredErrorFunc)(void* userData, xmlError* error)
+
+ cdef void xmlSetGenericErrorFunc(void* ctxt, xmlGenericErrorFunc func)
+ cdef void xmlSetStructuredErrorFunc(void* ctxt, xmlStructuredErrorFunc func)
ctypedef enum xmlErrorDomain:
XML_FROM_NONE = 0
@@ -775,3 +776,9 @@
XML_I18N_NO_OUTPUT = 6004 # 6004
XML_CHECK_ = 6005 # 5033
XML_CHECK_X = 6006 # 503
+
+cdef extern from "libxml/globals.h":
+ cdef void xmlThrDefSetGenericErrorFunc(void* ctx,
+ xmlGenericErrorFunc handler)
+ cdef void xmlThrDefSetStructuredErrorFunc(void* ctx,
+ xmlStructuredErrorFunc handler)
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Mon May 29 16:39:50 2006
@@ -14,7 +14,6 @@
cdef void _initThreadLogging():
"Setup logging for the current thread. Called from etree.initThread()."
# switch on line number reporting
- xmlparser.xmlLineNumbersDefault(1)
_logLibxmlErrors()
try:
_logLibxsltErrors()
@@ -372,7 +371,10 @@
# setup for global log:
cdef void _logLibxmlErrors():
+ xmlerror.xmlThrDefSetGenericErrorFunc(NULL, _nullGenericErrorFunc)
xmlerror.xmlSetGenericErrorFunc(NULL, _nullGenericErrorFunc)
+
+ xmlerror.xmlThrDefSetStructuredErrorFunc(NULL, _receiveError)
xmlerror.xmlSetStructuredErrorFunc(NULL, _receiveError)
################################################################################
From scoder at codespeak.net Mon May 29 16:56:02 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 29 May 2006 16:56:02 +0200 (CEST)
Subject: [Lxml-checkins] r27859 - in lxml/trunk: doc src/lxml
Message-ID: <20060529145602.3B59810057@code0.codespeak.net>
Author: scoder
Date: Mon May 29 16:56:00 2006
New Revision: 27859
Modified:
lxml/trunk/doc/FAQ.txt
lxml/trunk/src/lxml/parser.pxi
Log:
renamed ignore_blanks option in XMLParser as remove_blank_text, as it is called in HTMLParser
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Mon May 29 16:56:00 2006
@@ -82,9 +82,9 @@
only added between nodes that do not contain data. This is always the case
for trees constructed element-by-element, so no problems should be expected
here. For parsed trees, a good way to assure that no conflicting
- whitespace is left in the tree is the ``?gnore_blanks`` option::
+ whitespace is left in the tree is the ``remove_blank_text`` option::
- >>> parser = etree.XMLParser(ignore_blanks=True)
+ >>> parser = etree.XMLParser(remove_blank_text=True)
>>> tree = etree.parse(file, parser)
This will allow the parser to drop blank text nodes when constructing the
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Mon May 29 16:56:00 2006
@@ -446,14 +446,14 @@
* no_network - prevent network access
* ns_clean - clean up redundant namespace declarations
* recover - try hard to parse through broken XML
- * ignore_blanks - discard blank text nodes
+ * remove_blank_text - discard blank text nodes
Note that you must not share parsers between threads. This applies also
to the default parser.
"""
def __init__(self, attribute_defaults=False, dtd_validation=False,
load_dtd=False, no_network=False, ns_clean=False,
- recover=False, ignore_blanks=False):
+ recover=False, remove_blank_text=False):
cdef int parse_options
_BaseParser.__init__(self)
From scoder at codespeak.net Mon May 29 16:56:40 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 29 May 2006 16:56:40 +0200 (CEST)
Subject: [Lxml-checkins] r27860 - lxml/trunk/src/lxml
Message-ID: <20060529145640.032CD10057@code0.codespeak.net>
Author: scoder
Date: Mon May 29 16:56:39 2006
New Revision: 27860
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
doc update
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Mon May 29 16:56:39 2006
@@ -558,7 +558,7 @@
Available keyword arguments:
* recover - try hard to parse through broken HTML (default: True)
* no_network - prevent network access
- * remove_blank_text - clean up empty text nodes
+ * remove_blank_text - discard empty text nodes
Note that you must not share parsers between threads.
"""
From scoder at codespeak.net Mon May 29 17:42:04 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 29 May 2006 17:42:04 +0200 (CEST)
Subject: [Lxml-checkins] r27861 - lxml/trunk/src/lxml
Message-ID: <20060529154204.44CB210041@code0.codespeak.net>
Author: scoder
Date: Mon May 29 17:42:02 2006
New Revision: 27861
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
fix reference to previously renamed variable
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Mon May 29 17:42:02 2006
@@ -472,7 +472,7 @@
parse_options = parse_options | xmlparser.XML_PARSE_NSCLEAN
if recover:
parse_options = parse_options | xmlparser.XML_PARSE_RECOVER
- if ignore_blanks:
+ if remove_blank_text:
parse_options = parse_options | xmlparser.XML_PARSE_NOBLANKS
self._parse_options = parse_options
From scoder at codespeak.net Mon May 29 18:11:05 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 29 May 2006 18:11:05 +0200 (CEST)
Subject: [Lxml-checkins] r27863 - lxml/trunk/src/lxml/tests
Message-ID: <20060529161105.BC0BE10053@code0.codespeak.net>
Author: scoder
Date: Mon May 29 18:11:04 2006
New Revision: 27863
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
added test case by Noah: appending element with xml:id attribute fails (in both etree and ET)
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Mon May 29 18:11:04 2006
@@ -1663,7 +1663,27 @@
# as namespace is not moved along with it
del one
self.assertEquals('{http://a.b.c}baz', baz.tag)
-
+
+ def test_attribute_xmlns_move(self):
+ Element = self.etree.Element
+
+ root = etree.Element('element')
+
+ subelement = etree.Element('subelement')
+ subelement.set("{http://www.w3.org/XML/1998/namespace}id", "foo")
+ self.assertEqual(1, len(subelement.attrib))
+ self.assertEquals(
+ "foo",
+ subelement.get("{http://www.w3.org/XML/1998/namespace}id"))
+
+ root.append(subelement)
+ self.assertEqual(1, len(subelement.attrib))
+ self.assertEquals({"{http://www.w3.org/XML/1998/namespace}id" : "foo"},
+ subelement.attrib)
+ self.assertEquals(
+ "foo",
+ subelement.get("{http://www.w3.org/XML/1998/namespace}id"))
+
def test_tostring(self):
tostring = self.etree.tostring
Element = self.etree.Element
From faassen at codespeak.net Mon May 29 18:20:14 2006
From: faassen at codespeak.net (faassen at codespeak.net)
Date: Mon, 29 May 2006 18:20:14 +0200 (CEST)
Subject: [Lxml-checkins] r27865 - lxml/trunk
Message-ID: <20060529162014.2ED3E10053@code0.codespeak.net>
Author: faassen
Date: Mon May 29 18:20:13 2006
New Revision: 27865
Modified:
lxml/trunk/CREDITS.txt
Log:
update credits. :)
Modified: lxml/trunk/CREDITS.txt
==============================================================================
--- lxml/trunk/CREDITS.txt (original)
+++ lxml/trunk/CREDITS.txt Mon May 29 18:20:13 2006
@@ -1,10 +1,10 @@
Credits
-------
-Martijn Faassen - initial main developer
-
Stefan Behnel - main developer and maintainer
+Martijn Faassen - creator of lxml and initial main developer
+
Marc-Antoine Parent - XPath extension function help and patches
Olivier Grisel - improved (c)ElementTree compatibility patches,
From scoder at codespeak.net Tue May 30 06:56:43 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 30 May 2006 06:56:43 +0200 (CEST)
Subject: [Lxml-checkins] r27887 - lxml/trunk/src/lxml/tests
Message-ID: <20060530045643.CB17910063@code0.codespeak.net>
Author: scoder
Date: Tue May 30 06:56:39 2006
New Revision: 27887
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
test case cleanup
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Tue May 30 06:56:39 2006
@@ -1667,9 +1667,9 @@
def test_attribute_xmlns_move(self):
Element = self.etree.Element
- root = etree.Element('element')
+ root = Element('element')
- subelement = etree.Element('subelement')
+ subelement = Element('subelement')
subelement.set("{http://www.w3.org/XML/1998/namespace}id", "foo")
self.assertEqual(1, len(subelement.attrib))
self.assertEquals(
From scoder at codespeak.net Tue May 30 07:39:39 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 30 May 2006 07:39:39 +0200 (CEST)
Subject: [Lxml-checkins] r27888 - lxml/trunk/src/lxml
Message-ID: <20060530053939.816EF10053@code0.codespeak.net>
Author: scoder
Date: Tue May 30 07:39:37 2006
New Revision: 27888
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
whitespace
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Tue May 30 07:39:37 2006
@@ -633,7 +633,7 @@
def set(self, key, value):
_setAttributeValue(self, key, value)
-
+
def append(self, _Element element not None):
cdef xmlNode* c_next
cdef xmlNode* c_node
From scoder at codespeak.net Tue May 30 08:26:07 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 30 May 2006 08:26:07 +0200 (CEST)
Subject: [Lxml-checkins] r27889 - lxml/trunk/src/lxml
Message-ID: <20060530062607.B78DB1005A@code0.codespeak.net>
Author: scoder
Date: Tue May 30 08:26:05 2006
New Revision: 27889
Modified:
lxml/trunk/src/lxml/proxy.pxi
Log:
small performance improvement in deallocation code: faster handling of common case where elements are deallocated but not their parents (SubElement etc.)
Modified: lxml/trunk/src/lxml/proxy.pxi
==============================================================================
--- lxml/trunk/src/lxml/proxy.pxi (original)
+++ lxml/trunk/src/lxml/proxy.pxi Tue May 30 08:26:05 2006
@@ -151,21 +151,23 @@
cdef xmlNode* c_current
cdef xmlNode* c_top
#print "trying to do deallocating:", c_node.type
+ if c_node._private is not NULL:
+ #print "Not freeing: proxies still exist"
+ return NULL
c_current = c_node.parent
c_top = c_node
while c_current is not NULL:
#print "checking:", c_current.type
- # if we're still attached to the document, don't deallocate
if c_current.type == tree.XML_DOCUMENT_NODE or \
c_current.type == tree.XML_HTML_DOCUMENT_NODE:
#print "not freeing: still in doc"
return NULL
+ # if we're still attached to the document, don't deallocate
+ if c_current._private is not NULL:
+ #print "Not freeing: proxies still exist"
+ return NULL
c_top = c_current
c_current = c_current.parent
- # cannot free a top which has proxies pointing to it
- if c_top._private is not NULL:
- #print "Not freeing: proxies still exist"
- return NULL
# see whether we have children to deallocate
if canDeallocateChildren(c_top):
return c_top
From scoder at codespeak.net Tue May 30 08:33:54 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 30 May 2006 08:33:54 +0200 (CEST)
Subject: [Lxml-checkins] r27890 - in lxml/trunk: . doc
Message-ID: <20060530063354.1DEC41005A@code0.codespeak.net>
Author: scoder
Date: Tue May 30 08:33:52 2006
New Revision: 27890
Modified:
lxml/trunk/bench.py
lxml/trunk/doc/performance.txt
Log:
benchmark for creating Elements, compare to makeelement/SubElement
Modified: lxml/trunk/bench.py
==============================================================================
--- lxml/trunk/bench.py (original)
+++ lxml/trunk/bench.py Tue May 30 08:33:52 2006
@@ -347,6 +347,11 @@
for child in root:
child.makeelement('{test}test', empty_attrib)
+ def bench_create_elements(self, root):
+ Element = self.etree.Element
+ for child in root:
+ Element('{test}test')
+
def bench_replace_children_element(self, root):
Element = self.etree.Element
for child in root:
@@ -684,6 +689,9 @@
result = run_bench(bench, *benchmark_setup)
except SkippedTest:
print "skipped"
+ except KeyboardInterrupt:
+ print "interrupted by user"
+ sys.exit(1)
except Exception, e:
print "failed: %s: %s" % (e.__class__.__name__, e)
else:
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Tue May 30 08:33:52 2006
@@ -13,7 +13,8 @@
The statements made here are backed by the benchmark script `bench.py`_ that
comes with the lxml source distribution. The timings cited below compare lxml
-1.0, ElementTree 1.2.6 and cElementTree 1.0.5 under CPython 2.4.2.
+1.0, ElementTree 1.2.6 and cElementTree 1.0.5 under CPython 2.4.2 on an AMD64
+machine.
.. _`bench.py`: http://codespeak.net/svn/lxml/trunk/bench.py
@@ -40,10 +41,10 @@
Parsing and Serialising
-----------------------
-This is one of the areas where lxml excels. The reason is that both parts are
-executed entirely at the C level, without major interaction with Python code.
-The results are rather impressive. Compared to cElementTree, lxml is about 20
-to 40 times faster on serialisation::
+These are areas where lxml excels. The reason is that both parts are executed
+entirely at the C level, without major interaction with Python code. The
+results are rather impressive. Compared to cElementTree, lxml is about 20 to
+40 times faster on serialisation::
lxe: tostring_utf16 (SA T2) 30.9846 msec/pass
cET: tostring_utf16 (SA T2) 715.5002 msec/pass
@@ -110,12 +111,32 @@
three times faster than lxml here. One of the reasons is that lxml must
additionally discard the created Python elements after their use, when they
are no longer referenced. ET and cET represent the tree itself through these
-objects, which reduces their overhead in creating them.
+objects, which reduces the overhead in creating them.
+
+As opposed to ET, libxml2 has a notion of documents that each element must be
+in. This results in a major performance difference for creating independent
+Elements that end up in independently created documents::
+
+ lxe: create_elements (-- T2 ) 22.0083 msec/pass
+ cET: create_elements (-- T2 ) 0.3920 msec/pass
+ ET : create_elements (-- T2 ) 3.0865 msec/pass
+
+Therefore, it is always preferable to create Elements for the document they
+are supposed to end up in, either as SubElements of an Element or using the
+explicit ``Element.makeelement()`` call::
+
+ lxe: makeelement (-- T2 ) 4.3003 msec/pass
+ cET: makeelement (-- T2 ) 0.5520 msec/pass
+ ET : makeelement (-- T2 ) 3.8092 msec/pass
+
+ lxe: create_subelements (-- T2 ) 3.9673 msec/pass
+ cET: create_subelements (-- T2 ) 0.5666 msec/pass
+ ET : create_subelements (-- T2 ) 6.4613 msec/pass
So, if the main performance bottleneck of an application is creating large XML
trees in memory through calls to Element and SubElement, cET is the best
choice. Note, however, that the serialisation performance may even out this
-advantage.
+advantage, especially for smaller trees and trees with many attributes.
A critical action for lxml is moving elements between document contexts. It
requires lxml to do recursive adaptations throughout the moved tree structure.
@@ -170,7 +191,8 @@
Another area where lxml is very fast is iteration for tree traversal. If your
algorithms can benefit from step-by-step traversal of the XML tree and
-especially if few elements are of interest, lxml is a good choice::
+especially if few elements are of interest or the element tag name is known,
+lxml is a good choice::
lxe: getiterator_all (-- T2 ) 31.2719 msec/pass
cET: getiterator_all (-- T2 ) 36.3687 msec/pass
From scoder at codespeak.net Tue May 30 10:14:07 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 30 May 2006 10:14:07 +0200 (CEST)
Subject: [Lxml-checkins] r27892 - lxml/trunk/src/lxml
Message-ID: <20060530081407.9532010057@code0.codespeak.net>
Author: scoder
Date: Tue May 30 10:14:06 2006
New Revision: 27892
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
moved _init() method from _NodeBase down to _Element where it is actually used
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Tue May 30 10:14:06 2006
@@ -347,11 +347,6 @@
unregisterProxy(self)
attemptDeallocation(self._c_node)
- def _init(self):
- """Called after object initialisation. Subclasses may override
- this if they recursively call _init() in the superclasses.
- """
-
cdef class _ElementTree:
cdef _Document _doc
cdef _NodeBase _context_node
@@ -562,6 +557,11 @@
cdef class _Element(_NodeBase):
cdef object _tag
+ def _init(self):
+ """Called after object initialisation. Custom subclasses may override
+ this if they recursively call _init() in the superclasses.
+ """
+
# MANIPULATORS
def __setitem__(self, Py_ssize_t index, _NodeBase element not None):
From scoder at codespeak.net Tue May 30 10:14:19 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 30 May 2006 10:14:19 +0200 (CEST)
Subject: [Lxml-checkins] r27893 - lxml/trunk/src/lxml
Message-ID: <20060530081419.1B7A410057@code0.codespeak.net>
Author: scoder
Date: Tue May 30 10:14:17 2006
New Revision: 27893
Modified:
lxml/trunk/src/lxml/nsclasses.pxi
Log:
doc updates
Modified: lxml/trunk/src/lxml/nsclasses.pxi
==============================================================================
--- lxml/trunk/src/lxml/nsclasses.pxi (original)
+++ lxml/trunk/src/lxml/nsclasses.pxi Tue May 30 10:14:17 2006
@@ -5,10 +5,13 @@
cdef class ElementBase(_Element):
"""All classes in namespace implementations must inherit from this one.
+
Note that subclasses *must not* override __init__ or __new__ as it is
absolutely undefined when these objects will be created or destroyed. All
- persistent state of elements must be stored in the underlying XML."""
- pass
+ persistent state of elements must be stored in the underlying XML. If you
+ really need to initialize the object after creation, you can implement an
+ ``_init(self)`` method that will be called after object creation.
+ """
cdef object __NAMESPACE_REGISTRIES
__NAMESPACE_REGISTRIES = {}
From scoder at codespeak.net Tue May 30 10:45:37 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 30 May 2006 10:45:37 +0200 (CEST)
Subject: [Lxml-checkins] r27894 - lxml/trunk/doc
Message-ID: <20060530084537.648A010057@code0.codespeak.net>
Author: scoder
Date: Tue May 30 10:45:35 2006
New Revision: 27894
Modified:
lxml/trunk/doc/namespace_extensions.txt
Log:
major rewrite of documentation in doc/namespace_extensions.txt
Modified: lxml/trunk/doc/namespace_extensions.txt
==============================================================================
--- lxml/trunk/doc/namespace_extensions.txt (original)
+++ lxml/trunk/doc/namespace_extensions.txt Tue May 30 10:45:35 2006
@@ -2,118 +2,111 @@
Implementing namespaces with the Namespace class
================================================
-Also see `extensions`_.
+lxml allows you to implement namespaces, in a rather literal sense. You can
+write your own classes for Elements and have lxml use them for a specific tag
+name in a specific namespace.
-.. _`extensions`: extensions.html
+Custom Elements must inherit from the ``etree.ElementBase`` class, which
+provides the Element interface for subclasses::
-Imagine, you have a namespace called 'http://hui.de/honk' and have to
-treat all of its elements in a specific way, say, to find out if they
-are really honking. You could provide a function called 'is_honking'
-that handles that::
-
- >>> def is_honking(honk_element):
- ... return honk_element.get('honking') == 'true'
-
-Then you can use it::
+ >>> from lxml import etree
+ >>> class HonkElement(etree.ElementBase):
+ ... def honking(self):
+ ... return self.get('honking') == 'true'
+ ... honking = property(honking)
- >>> from lxml.etree import XML
- >>> honk_element = XML(' ')
- >>> print is_honking(honk_element)
- True
+This defines a new Element class ``HonkElement`` with a property ``honking``.
-Not too bad, right? Now, imagine, you only want to do that to certain
-elements from that namespace and prevent others from being passed to
-is_honking. You can add a check to is_honking to test the tag name
-before doing anything else.
-
-After a while, however, you remember what you heard at school about
-object oriented programming. You start wondering if there isn't a
-nicer way to do that. -- And there is!
+Note that you cannot (or rather must not) instantiate this class yourself.
+lxml.etree will do that for you through its normal ElementTree API. To let
+lxml know about it, you must register it with a namespace.
The Namespace class
-===================
-
-lxml allows you to implement namespaces, in a rather literal
-sense. You can do the above like this::
-
- >>> from lxml.etree import Namespace, ElementBase
- >>> class HonkElement(ElementBase):
- ... def honking(self):
- ... return self.get('honking') == 'true'
- ... honking = property(honking)
+-------------------
-Now you can build the new namespace by calling the Namespace class::
+You can build a new namespace (or retrieve an existing one) by calling the
+Namespace class::
- >>> namespace = Namespace('http://hui.de/honk')
+ >>> namespace = etree.Namespace('http://hui.de/honk')
-and then register the new element type with that namespace::
+and then register the new element type with that namespace, say, under the tag
+name ``honk``::
>>> namespace['honk'] = HonkElement
-After this, you create and use your XML elements::
+After this, you create and use your XML elements through the normal API of
+lxml::
- >>> honk_element = XML(' ')
+ >>> xml = ' '
+ >>> honk_element = etree.XML(xml)
>>> print honk_element.honking
True
The same works when creating elements by hand::
- >>> from lxml.etree import Element
- >>> honk_element = Element('{http://hui.de/honk}honk', honking='true')
+ >>> honk_element = etree.Element('{http://hui.de/honk}honk',
+ ... honking='true')
>>> print honk_element.honking
True
-Essentially, what this allows you to do, is giving elements a specific
-API based on their namespace and element name.
+Essentially, what this allows you to do, is to give elements a custom API
+based on their namespace and tag name.
+
+A somewhat related topic are `extension functions`_ which use a similar
+mechanism for registering extension functions in XPath and XSLT.
+
+.. _`extension functions`: extensions.html
Element initialization
----------------------
-There is one thing to remember. Element classes *must not* have a
-constructor, neither must there be any internal state (except for
-their XML representation). Element instances are created and garbage
+There is one thing to remember. Element classes *must not* have a
+constructor, neither must there be any internal state (except for the data
+stored in the underlying XML tree). Element instances are created and garbage
collected at need, so there is no way to predict when and how often a
-constructor would be called. Even worse, when the ``__init__`` method
-is called, the object may not even be initialized yet to represent the
-XML tag, so there is not much use in providing an ``__init__`` method
-in subclasses.
-
-However, there is one possible way to do things on element
-initialization. Element classes have an ``_init()`` method that can be
-overridden. It can be used to modify the XML tree, e.g. to construct
+constructor would be called. Even worse, when the ``__init__`` method is
+called, the object may not even be initialized yet to represent the XML tag,
+so there is not much use in providing an ``__init__`` method in subclasses.
+
+However, there is one possible way to do things on element initialization, if
+you really need to. ElementBase classes have an ``_init()`` method that can
+be overridden. It can be used to modify the XML tree, e.g. to construct
special children or verify and update attributes.
The semantics of ``_init()`` are as follows:
-* It is called at least once on element instantiation time. That is,
- when a Python representation of the element is created. At that
- time, the element object is completely initialized to represent a
- specific XML element within the tree.
-
-* The method has complete access to the XML structure. Modifications
- can be done in exactly the same way as anywhere else in the program.
-
-* It may be called multiple times. The _init() code provided by
- subclasses must take special care by itself that multiple executions
- either are harmless or that they are prevented by some kind of flag
- in the XML tree. The latter can be achieved by modifying an
- attribute value or by removing or adding a specific child node and
- then verifying this before running through the init process.
+* It is called at least once on element instantiation time. That is, when a
+ Python representation of the element is created by lxml. At that time, the
+ element object is completely initialized to represent a specific XML element
+ within the tree.
+
+* The method has complete access to the XML tree. Modifications can be done
+ in exactly the same way as anywhere else in the program.
+
+* Python representations of elements may be created multiple times during the
+ lifetime of an XML element in the underlying tree. The ``_init()`` code
+ provided by subclasses must take special care by itself that multiple
+ executions either are harmless or that they are prevented by some kind of
+ flag in the XML tree. The latter can be achieved by modifying an attribute
+ value or by removing or adding a specific child node and then verifying this
+ before running through the init process.
+
+* Any exceptions raised in ``_init()`` will be propagated throught the API
+ call that lead to the creation of the Element. So be careful with the code
+ you write here as its exceptions may turn up in various unexpected places.
Default implementations
-----------------------
-There is a slight difference between the Namespace example and the
-simple 'is_honking' method above. We associated the HonkElement class
-only with the 'honk' element. If you have other elements in the same
-namespace, they do not pick up the same implementation.
-
-Example::
+In the Namespace example above, we associated the HonkElement class only with
+the 'honk' element. If an XML tree contains different elements in the same
+namespace, they do not pick up the same implementation::
- >>> honk_element = XML(' ')
+ >>> xml = ' '
+ >>> honk_element = etree.XML(xml)
>>> print honk_element.honking
True
>>> print honk_element[0].honking
@@ -122,18 +115,18 @@
AttributeError: 'etree._Element' object has no attribute 'honking'
You can therefore provide one implementation per element name in each
-namespace and have lxml select the right one on the fly. If you want
-one element implementation per namespace (ignoring the element name)
-or prefer having a common class for most elements except a few, you
-can specify a default implementation for an entire namespace by
-registering that class with the empty element name (None).
-
-You may consider following an object oriented approach. If you build
-a class hierarchy of element classes, you can also implement a base
-class for a namespace, that is used if no specific element class is
-provided. Again, you only have to pass None as an element name::
+namespace and have lxml select the right one on the fly. If you want one
+element implementation per namespace (ignoring the element name) or prefer
+having a common class for most elements except a few, you can specify a
+default implementation for an entire namespace by registering that class with
+the empty element name (None).
+
+You may consider following an object oriented approach here. If you build a
+class hierarchy of element classes, you can also implement a base class for a
+namespace that is used if no specific element class is provided. Again, you
+can just pass None as an element name::
- >>> class HonkNSElement(ElementBase):
+ >>> class HonkNSElement(etree.ElementBase):
... def honk(self):
... return "HONK"
>>> namespace[None] = HonkNSElement
@@ -144,9 +137,15 @@
... honking = property(honking)
>>> namespace['honk'] = HonkElement
-Now you can use your new namespace::
+Now you can rely on lxml to always return objects of type HonkNSElement or its
+subclasses for elements of this namespace::
+
+ >>> xml = ' '
+ >>> honk_element = etree.XML(xml)
+
+ >>> print type(honk_element), type(honk_element[0])
+
- >>> honk_element = XML(' ')
>>> print honk_element.honking
True
>>> print honk_element.honk()
From scoder at codespeak.net Tue May 30 10:45:55 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 30 May 2006 10:45:55 +0200 (CEST)
Subject: [Lxml-checkins] r27895 - lxml/trunk/doc
Message-ID: <20060530084555.D003710057@code0.codespeak.net>
Author: scoder
Date: Tue May 30 10:45:52 2006
New Revision: 27895
Modified:
lxml/trunk/doc/api.txt
Log:
cleanup in doc/api.txt
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Tue May 30 10:45:52 2006
@@ -241,8 +241,26 @@
Optionally, you can provide a ``namespaces`` keyword argument, which should be
a dictionary mapping the namespace prefixes used in the XPath expression to
-namespace URIs. The optional ``extensions`` argument is used to define
-`extension functions`_ in Python.
+namespace URIs::
+
+ >>> f = StringIO('''\
+ ...
+ ... Text
+ ...
+ ... ''')
+ >>> doc = etree.parse(f)
+ >>> r = doc.xpath('/t:foo/b:bar', {'t': 'http://codespeak.net/ns/test1',
+ ... 'b': 'http://codespeak.net/ns/test2'})
+ >>> len(r)
+ 1
+ >>> r[0].tag
+ '{http://codespeak.net/ns/test2}bar'
+ >>> r[0].text
+ 'Text'
+
+There is also an optional ``extensions`` argument which is used to define
+`extension functions`_ in Python that are local to this evaluation.
.. _`extension functions`: extensions.html
@@ -261,34 +279,6 @@
contain a comment, the result contains a string as well, inside
```` markers.
-Example::
-
- >>> f = StringIO(' ')
- >>> doc = etree.parse(f)
- >>> r = doc.xpath('/foo/bar')
- >>> len(r)
- 1
- >>> r[0].tag
- 'bar'
-
-Example of using namespace prefixes::
-
- >>> f = StringIO('''\
- ...
- ... Text
- ...
- ... ''')
- >>> doc = etree.parse(f)
- >>> r = doc.xpath('/t:foo/b:bar', {'t': 'http://codespeak.net/ns/test1',
- ... 'b': 'http://codespeak.net/ns/test2'})
- >>> len(r)
- 1
- >>> r[0].tag
- '{http://codespeak.net/ns/test2}bar'
- >>> r[0].text
- 'Text'
-
A related convenience method of ElementTree objects is ``getpath(element)``,
which returns a structural, absolute XPath expression to find that element::
From scoder at codespeak.net Tue May 30 12:18:06 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 30 May 2006 12:18:06 +0200 (CEST)
Subject: [Lxml-checkins] r27901 - lxml/trunk/src/lxml/tests
Message-ID: <20060530101806.D3D3110057@code0.codespeak.net>
Author: scoder
Date: Tue May 30 12:18:04 2006
New Revision: 27901
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
clean up in test case
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Tue May 30 12:18:04 2006
@@ -1669,15 +1669,15 @@
root = Element('element')
- subelement = Element('subelement')
- subelement.set("{http://www.w3.org/XML/1998/namespace}id", "foo")
- self.assertEqual(1, len(subelement.attrib))
+ subelement = Element('subelement',
+ {"{http://www.w3.org/XML/1998/namespace}id": "foo"})
+ self.assertEquals(1, len(subelement.attrib))
self.assertEquals(
"foo",
subelement.get("{http://www.w3.org/XML/1998/namespace}id"))
root.append(subelement)
- self.assertEqual(1, len(subelement.attrib))
+ self.assertEquals(1, len(subelement.attrib))
self.assertEquals({"{http://www.w3.org/XML/1998/namespace}id" : "foo"},
subelement.attrib)
self.assertEquals(
From scoder at codespeak.net Tue May 30 14:18:06 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 30 May 2006 14:18:06 +0200 (CEST)
Subject: [Lxml-checkins] r27911 - lxml/trunk/src/lxml
Message-ID: <20060530121806.07F3210057@code0.codespeak.net>
Author: scoder
Date: Tue May 30 14:18:04 2006
New Revision: 27911
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
Log:
iterative rewrite of _findDepthFirstInFollowing
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Tue May 30 14:18:04 2006
@@ -267,18 +267,35 @@
2) its descendents
3) its following siblings.
"""
- cdef xmlNode* c_child
+ cdef xmlNode* c_next
+ cdef xmlNode* c_start_parent
if c_name is NULL:
# always match
return c_node
+ if c_node is NULL:
+ return NULL
+ c_start_parent = c_node.parent
while c_node is not NULL:
- if _tagMatches(c_node, c_href, c_name):
- return c_node
- if c_node.children is not NULL:
- c_child = _findDepthFirstInFollowing(c_node.children, c_href, c_name)
- if c_child is not NULL:
- return c_child
- c_node = _nextElement(c_node)
+ if _isElement(c_node):
+ if _tagMatches(c_node, c_href, c_name):
+ return c_node
+ # walk through children
+ c_next = c_node.children
+ if c_next is NULL:
+ c_next = _nextElement(c_node)
+ elif not _isElement(c_next):
+ c_next = _nextElement(c_next)
+ if c_next is NULL:
+ c_next = _nextElement(c_node)
+ else:
+ c_next = _nextElement(c_node)
+ # back off through parents
+ while c_next is NULL:
+ c_node = c_node.parent
+ if c_node is c_start_parent:
+ return NULL
+ c_next = _nextElement(c_node)
+ c_node = c_next
return NULL
cdef int _tagMatches(xmlNode* c_node, char* c_href, char* c_name):
From scoder at codespeak.net Tue May 30 14:38:37 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 30 May 2006 14:38:37 +0200 (CEST)
Subject: [Lxml-checkins] r27915 - lxml/trunk/src/lxml
Message-ID: <20060530123837.4E58810057@code0.codespeak.net>
Author: scoder
Date: Tue May 30 14:38:34 2006
New Revision: 27915
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
Log:
cleanup in _findDepthFirstInFollowing
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Tue May 30 14:38:34 2006
@@ -276,19 +276,18 @@
return NULL
c_start_parent = c_node.parent
while c_node is not NULL:
- if _isElement(c_node):
- if _tagMatches(c_node, c_href, c_name):
- return c_node
- # walk through children
- c_next = c_node.children
+ if _tagMatches(c_node, c_href, c_name):
+ return c_node
+ # walk through children
+ c_next = c_node.children
+ if c_next is NULL:
+ # sibling?
+ c_next = _nextElement(c_node)
+ elif not _isElement(c_next):
+ # we need an element
+ c_next = _nextElement(c_next)
if c_next is NULL:
c_next = _nextElement(c_node)
- elif not _isElement(c_next):
- c_next = _nextElement(c_next)
- if c_next is NULL:
- c_next = _nextElement(c_node)
- else:
- c_next = _nextElement(c_node)
# back off through parents
while c_next is NULL:
c_node = c_node.parent
From scoder at codespeak.net Tue May 30 16:12:39 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 30 May 2006 16:12:39 +0200 (CEST)
Subject: [Lxml-checkins] r27921 - lxml/trunk/src/lxml
Message-ID: <20060530141239.1311110053@code0.codespeak.net>
Author: scoder
Date: Tue May 30 16:12:37 2006
New Revision: 27921
Modified:
lxml/trunk/src/lxml/serializer.pxi
Log:
fixed a memory access bug found by valgrind: xmlOutputBufferClose free the encoding handler, we must not call xmlCharEncCloseFunc
Modified: lxml/trunk/src/lxml/serializer.pxi
==============================================================================
--- lxml/trunk/src/lxml/serializer.pxi (original)
+++ lxml/trunk/src/lxml/serializer.pxi Tue May 30 16:12:37 2006
@@ -38,7 +38,6 @@
tree.xmlBufferLength(c_result_buffer))
finally:
tree.xmlOutputBufferClose(c_buffer)
- tree.xmlCharEncCloseFunc(enchandler)
return result
cdef _tounicode(_NodeBase element, int pretty_print):
From scoder at codespeak.net Tue May 30 19:15:15 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 30 May 2006 19:15:15 +0200 (CEST)
Subject: [Lxml-checkins] r27935 - lxml/trunk/src/lxml
Message-ID: <20060530171515.2528B10057@code0.codespeak.net>
Author: scoder
Date: Tue May 30 19:15:13 2006
New Revision: 27935
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
cleanup
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Tue May 30 19:15:13 2006
@@ -52,12 +52,12 @@
if result.dict is NULL:
result.dict = xmlparser.xmlDictCreate()
self._c_dict = result.dict
- xmlparser.xmlDictReference(result.dict)
+ xmlparser.xmlDictReference(self._c_dict)
elif result.dict != self._c_dict:
if result.dict is not NULL:
xmlparser.xmlDictFree(result.dict)
result.dict = self._c_dict
- xmlparser.xmlDictReference(self._c_dict)
+ xmlparser.xmlDictReference(result.dict)
cdef _ParserContext __GLOBAL_PARSER_CONTEXT
__GLOBAL_PARSER_CONTEXT = _ParserContext()
From scoder at codespeak.net Tue May 30 19:18:34 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 30 May 2006 19:18:34 +0200 (CEST)
Subject: [Lxml-checkins] r27936 - lxml/trunk/src/lxml
Message-ID: <20060530171834.B2A0410057@code0.codespeak.net>
Author: scoder
Date: Tue May 30 19:18:33 2006
New Revision: 27936
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
cleanup
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Tue May 30 19:18:33 2006
@@ -79,7 +79,7 @@
cdef Py_ssize_t l
cdef char* buffer
cdef char* enc
- utext = unicode(" ")
+ utext = python.PyUnicode_DecodeUTF8(" ", 7, 'strict')
l = python.PyUnicode_GET_DATA_SIZE(utext)
buffer = python.PyUnicode_AS_DATA(utext)
enc = _findEncodingName(buffer, l)
From scoder at codespeak.net Tue May 30 21:25:15 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 30 May 2006 21:25:15 +0200 (CEST)
Subject: [Lxml-checkins] r27940 - lxml/trunk/src/lxml/tests
Message-ID: <20060530192515.42AC510057@code0.codespeak.net>
Author: scoder
Date: Tue May 30 21:25:03 2006
New Revision: 27940
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
fix test case
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Tue May 30 21:25:03 2006
@@ -1678,8 +1678,9 @@
root.append(subelement)
self.assertEquals(1, len(subelement.attrib))
- self.assertEquals({"{http://www.w3.org/XML/1998/namespace}id" : "foo"},
- subelement.attrib)
+ self.assertEquals(
+ {"{http://www.w3.org/XML/1998/namespace}id" : "foo"}.items(),
+ subelement.attrib.items())
self.assertEquals(
"foo",
subelement.get("{http://www.w3.org/XML/1998/namespace}id"))
From scoder at codespeak.net Tue May 30 21:25:42 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 30 May 2006 21:25:42 +0200 (CEST)
Subject: [Lxml-checkins] r27941 - lxml/trunk/src/lxml
Message-ID: <20060530192542.AB54210057@code0.codespeak.net>
Author: scoder
Date: Tue May 30 21:25:41 2006
New Revision: 27941
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
Log:
fixed stupid, stupid bug with namespace reconciliation: free the document /after/ fixing namespaces
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Tue May 30 21:25:41 2006
@@ -434,9 +434,9 @@
tree below (including the current node). It also reconciliates
namespaces so they're correct inside the new environment.
"""
+ tree.xmlReconciliateNs(doc._c_doc, node._c_node)
if node._doc is not doc:
changeDocumentBelow(node._c_node, doc)
- tree.xmlReconciliateNs(doc._c_doc, node._c_node)
cdef void changeDocumentBelow(xmlNode* c_node, _Document doc):
"""Update the Python references in the tree below the node.
From scoder at codespeak.net Tue May 30 21:42:32 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 30 May 2006 21:42:32 +0200 (CEST)
Subject: [Lxml-checkins] r27942 - lxml/trunk
Message-ID: <20060530194232.7F1A91005A@code0.codespeak.net>
Author: scoder
Date: Tue May 30 21:42:31 2006
New Revision: 27942
Modified:
lxml/trunk/CHANGES.txt
Log:
mark namespace bug as fixed
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Tue May 30 21:42:31 2006
@@ -24,6 +24,9 @@
Bugs fixed
----------
+* Namespace fixing after moving elements between documents could fail if the
+ source document was freed too early
+
* Setting namespace-less tag names on namespaced elements ('{ns}t' -> 't')
didn't reset the namespace
From scoder at codespeak.net Wed May 31 08:47:52 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 31 May 2006 08:47:52 +0200 (CEST)
Subject: [Lxml-checkins] r27946 - lxml/trunk/src/lxml
Message-ID: <20060531064752.95B7310060@code0.codespeak.net>
Author: scoder
Date: Wed May 31 08:47:37 2006
New Revision: 27946
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/proxy.pxi
Log:
made _Attrib a plain Python object on top of an _Element, allowed for major code cleanup and simplification in proxy code
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Wed May 31 08:47:37 2006
@@ -80,7 +80,7 @@
else:
return None
-cdef object _attributeValue(xmlNode* c_element, xmlNode* c_attrib_node):
+cdef object _attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node):
cdef char* value
if c_attrib_node.ns is NULL or c_attrib_node.ns.href is NULL:
value = tree.xmlGetNoNsProp(c_element, c_attrib_node.name)
@@ -425,37 +425,3 @@
if hasattr(source, 'geturl'):
return source.geturl()
return None
-
-cdef void moveNodeToDocument(_NodeBase node, _Document doc):
- """For a node and all nodes below, change document.
-
- A node can change document in certain operations as an XML
- subtree can move. This updates all possible proxies in the
- tree below (including the current node). It also reconciliates
- namespaces so they're correct inside the new environment.
- """
- tree.xmlReconciliateNs(doc._c_doc, node._c_node)
- if node._doc is not doc:
- changeDocumentBelow(node._c_node, doc)
-
-cdef void changeDocumentBelow(xmlNode* c_node, _Document doc):
- """Update the Python references in the tree below the node.
-
- Note that we expect C pointers to the document to be updated already by
- libxml2.
- """
- cdef ProxyRef* ref
- cdef xmlNode* c_current
- cdef _NodeBase proxy
- # adjust all children recursively
- c_current = c_node.children
- while c_current is not NULL:
- changeDocumentBelow(c_current, doc)
- c_current = c_current.next
-
- # adjust Python references of current node
- ref = c_node._private
- while ref is not NULL:
- proxy = <_NodeBase>ref.proxy
- proxy._doc = doc
- ref = ref.next
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Wed May 31 08:47:37 2006
@@ -23,10 +23,6 @@
# any non-public function/class is prefixed with an underscore
# instance creation is always through factories
-ctypedef enum LXML_PROXY_TYPE:
- PROXY_ELEMENT
- PROXY_ATTRIB
-
# what to do with libxml2/libxslt error messages?
# 0 : drop
# 1 : use log
@@ -173,7 +169,8 @@
# the document
#print "freeing document:", self._c_doc
#displayNode(self._c_doc, 0)
- #print self._c_doc, self._c_doc.dict is __GLOBAL_PARSER_CONTEXT._c_dict
+ #print self._c_doc, self._c_doc.dict is __GLOBAL_PARSER_CONTEXT._c_dict
+ #print self._c_doc, canDeallocateChildNodes(self._c_doc)
tree.xmlFreeDoc(self._c_doc)
cdef getroot(self):
@@ -338,7 +335,6 @@
"""
cdef _Document _doc
cdef xmlNode* _c_node
- cdef int _proxy_type
def __dealloc__(self):
#print "trying to free node:", self._c_node
@@ -556,7 +552,6 @@
cdef class _Element(_NodeBase):
cdef object _tag
-
def _init(self):
"""Called after object initialisation. Custom subclasses may override
this if they recursively call _init() in the superclasses.
@@ -721,8 +716,11 @@
property attrib:
def __get__(self):
- return _attribFactory(self._doc, self._c_node)
-
+ # do *NOT* keep a reference here to prevent cyclic dependencies
+ # this would free the element in the Cyclic GC, which might let
+ # Python deallocate the document before the element!
+ return _Attrib(self)
+
property text:
def __get__(self):
return _collectText(self._c_node.children)
@@ -962,7 +960,7 @@
cdef _Element _elementFactory(_Document doc, xmlNode* c_node):
cdef _Element result
cdef char* c_ns_href
- result = getProxy(c_node, PROXY_ELEMENT)
+ result = getProxy(c_node)
if result is not None:
return result
if c_node is NULL:
@@ -980,8 +978,7 @@
result = element_class()
result._doc = doc
result._c_node = c_node
- result._proxy_type = PROXY_ELEMENT
- registerProxy(result, PROXY_ELEMENT)
+ registerProxy(result)
result._init()
return result
@@ -1038,7 +1035,7 @@
cdef _Comment _commentFactory(_Document doc, xmlNode* c_node):
cdef _Comment result
- result = getProxy(c_node, PROXY_ELEMENT)
+ result = getProxy(c_node)
if result is not None:
return result
if c_node is NULL:
@@ -1046,24 +1043,29 @@
result = _Comment()
result._doc = doc
result._c_node = c_node
- result._proxy_type = PROXY_ELEMENT
- registerProxy(result, PROXY_ELEMENT)
+ registerProxy(result)
return result
-cdef class _Attrib(_NodeBase):
+cdef class _Attrib:
+ cdef _NodeBase _element
+ def __init__(self, _NodeBase element not None):
+ self._element = element
+
# MANIPULATORS
def __setitem__(self, key, value):
- _setAttributeValue(self, key, value)
+ _setAttributeValue(self._element, key, value)
def __delitem__(self, key):
+ cdef xmlNode* c_node
cdef xmlAttr* c_attr
cdef char* c_tag
ns, tag = _getNsTag(key)
c_tag = _cstr(tag)
+ c_node = self._element._c_node
if ns is None:
- c_attr = tree.xmlHasProp(self._c_node, c_tag)
+ c_attr = tree.xmlHasProp(c_node, c_tag)
else:
- c_attr = tree.xmlHasNsProp(self._c_node, c_tag, _cstr(ns))
+ c_attr = tree.xmlHasNsProp(c_node, c_tag, _cstr(ns))
if c_attr is NULL:
# XXX free namespace that is not in use..?
raise KeyError, key
@@ -1077,43 +1079,46 @@
return repr(result)
def __getitem__(self, key):
- result = _getAttributeValue(self, key, None)
+ result = _getAttributeValue(self._element, key, None)
if result is None:
raise KeyError, key
else:
return result
def __nonzero__(self):
- cdef xmlNode* c_node
- c_node = (self._c_node.properties)
- while c_node is not NULL:
- if c_node.type == tree.XML_ATTRIBUTE_NODE:
+ cdef xmlAttr* c_attr
+ c_attr = self._element._c_node.properties
+ while c_attr is not NULL:
+ if c_attr.type == tree.XML_ATTRIBUTE_NODE:
return 1
- c_node = c_node.next
+ c_attr = c_attr.next
return 0
def __len__(self):
+ cdef xmlAttr* c_attr
cdef Py_ssize_t c
- cdef xmlNode* c_node
c = 0
- c_node = (self._c_node.properties)
- while c_node is not NULL:
- if c_node.type == tree.XML_ATTRIBUTE_NODE:
+ c_attr = self._element._c_node.properties
+ while c_attr is not NULL:
+ if c_attr.type == tree.XML_ATTRIBUTE_NODE:
c = c + 1
- c_node = c_node.next
+ c_attr = c_attr.next
return c
def get(self, key, default=None):
- return _getAttributeValue(self, key, default)
+ return _getAttributeValue(self._element, key, default)
def keys(self):
- result = []
cdef xmlNode* c_node
- c_node = (self._c_node.properties)
- while c_node is not NULL:
- if c_node.type == tree.XML_ATTRIBUTE_NODE:
- python.PyList_Append(result, _namespacedName(c_node))
- c_node = c_node.next
+ cdef xmlAttr* c_attr
+ c_node = self._element._c_node
+ c_attr = c_node.properties
+ result = []
+ while c_attr is not NULL:
+ if c_attr.type == tree.XML_ATTRIBUTE_NODE:
+ python.PyList_Append(
+ result, _namespacedName(c_attr))
+ c_attr = c_attr.next
return result
def __iter__(self):
@@ -1124,13 +1129,15 @@
def values(self):
cdef xmlNode* c_node
+ cdef xmlAttr* c_attr
+ c_node = self._element._c_node
+ c_attr = c_node.properties
result = []
- c_node = (self._c_node.properties)
- while c_node is not NULL:
- if c_node.type == tree.XML_ATTRIBUTE_NODE:
+ while c_attr is not NULL:
+ if c_attr.type == tree.XML_ATTRIBUTE_NODE:
python.PyList_Append(
- result, _attributeValue(self._c_node, c_node))
- c_node = c_node.next
+ result, _attributeValue(c_node, c_attr))
+ c_attr = c_attr.next
return result
def itervalues(self):
@@ -1139,14 +1146,16 @@
def items(self):
result = []
cdef xmlNode* c_node
- c_node = (self._c_node.properties)
- while c_node is not NULL:
- if c_node.type == tree.XML_ATTRIBUTE_NODE:
+ cdef xmlAttr* c_attr
+ c_node = self._element._c_node
+ c_attr = c_node.properties
+ while c_attr is not NULL:
+ if c_attr.type == tree.XML_ATTRIBUTE_NODE:
python.PyList_Append(result, (
- _namespacedName(c_node),
- _attributeValue(self._c_node, c_node)
+ _namespacedName(c_attr),
+ _attributeValue(c_node, c_attr)
))
- c_node = c_node.next
+ c_attr = c_attr.next
return result
def iteritems(self):
@@ -1159,32 +1168,22 @@
return False
def __contains__(self, key):
+ cdef xmlNode* c_node
cdef char* c_result
cdef char* c_tag
ns, tag = _getNsTag(key)
c_tag = _cstr(tag)
+ c_node = self._element._c_node
if ns is None:
- c_result = tree.xmlGetNoNsProp(self._c_node, c_tag)
+ c_result = tree.xmlGetNoNsProp(c_node, c_tag)
else:
- c_result = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns))
+ c_result = tree.xmlGetNsProp(c_node, c_tag, _cstr(ns))
if c_result is NULL:
return 0
else:
tree.xmlFree(c_result)
return 1
-cdef _Attrib _attribFactory(_Document doc, xmlNode* c_node):
- cdef _Attrib result
- result = getProxy(c_node, PROXY_ATTRIB)
- if result is not None:
- return result
- result = _Attrib()
- result._doc = doc
- result._c_node = c_node
- result._proxy_type = PROXY_ATTRIB
- registerProxy(result, PROXY_ATTRIB)
- return result
-
ctypedef xmlNode* (*_node_to_node_function)(xmlNode*)
cdef class ElementChildIterator:
Modified: lxml/trunk/src/lxml/proxy.pxi
==============================================================================
--- lxml/trunk/src/lxml/proxy.pxi (original)
+++ lxml/trunk/src/lxml/proxy.pxi Wed May 31 08:47:37 2006
@@ -4,76 +4,37 @@
# structure of the respective node to avoid multiple instantiation of
# the Python class
-cdef struct _ProxyRef
-
-cdef struct _ProxyRef:
- python.PyObject* proxy
- LXML_PROXY_TYPE type
- _ProxyRef* next
-
-ctypedef _ProxyRef ProxyRef
-
-cdef _NodeBase getProxy(xmlNode* c_node, int proxy_type):
- """Get a proxy for a given node and node type.
+cdef _NodeBase getProxy(xmlNode* c_node):
+ """Get a proxy for a given node.
"""
- cdef ProxyRef* ref
#print "getProxy for:", c_node
- if c_node is NULL:
+ if c_node is not NULL and c_node._private is not NULL:
+ return <_NodeBase>c_node._private
+ else:
return None
- ref = c_node._private
- while ref is not NULL:
- if ref.type == proxy_type:
- return <_NodeBase>ref.proxy
- ref = ref.next
- return None
cdef int hasProxy(xmlNode* c_node):
return c_node._private is not NULL
-cdef void registerProxy(_NodeBase proxy, int proxy_type):
+cdef registerProxy(_NodeBase proxy):
"""Register a proxy and type for the node it's proxying for.
"""
cdef xmlNode* c_node
- cdef ProxyRef* ref
# cannot register for NULL
c_node = proxy._c_node
if c_node is NULL:
return
- # XXX should we check whether we ran into proxy_type before?
#print "registering for:", proxy._c_node
- ref = python.PyMem_Malloc(sizeof(ProxyRef))
- ref.proxy = proxy
- ref.type = proxy_type
- ref.next = c_node._private
- c_node._private = ref # prepend
+ assert c_node._private is NULL, "double registering proxy!"
+ c_node._private = proxy
-cdef void unregisterProxy(_NodeBase proxy):
+cdef unregisterProxy(_NodeBase proxy):
"""Unregister a proxy for the node it's proxying for.
"""
- cdef python.PyObject* proxy_ref
- cdef ProxyRef* ref
- cdef ProxyRef* prev_ref
cdef xmlNode* c_node
- proxy_ref = proxy
c_node = proxy._c_node
- ref = c_node._private
- if ref.proxy == proxy_ref:
- c_node._private = ref.next
- python.PyMem_Free(ref)
- return
- prev_ref = ref
- #print "First registered is:", ref.type
- ref = ref.next
- while ref is not NULL:
- #print "Registered is:", ref.type
- if ref.proxy == proxy_ref:
- prev_ref.next = ref.next
- python.PyMem_Free(ref)
- return
- prev_ref = ref
- ref = ref.next
- #print "Proxy:", proxy, "Proxy type:", proxy_type
- assert 0, "Tried to unregister unknown proxy"
+ assert c_node._private is proxy, "Tried to unregister unknown proxy"
+ c_node._private = NULL
################################################################################
# temporarily make a node the root node of its document
@@ -169,7 +130,7 @@
c_top = c_current
c_current = c_current.parent
# see whether we have children to deallocate
- if canDeallocateChildren(c_top):
+ if canDeallocateChildNodes(c_top):
return c_top
else:
return NULL
@@ -178,38 +139,43 @@
cdef xmlNode* c_current
c_current = c_node.children
while c_current is not NULL:
- if c_current._private is not NULL:
- return 0
- if not canDeallocateChildren(c_current):
- return 0
+ if _isElement(c_current):
+ if c_current._private is not NULL:
+ return 0
+ if not canDeallocateChildNodes(c_current):
+ return 0
c_current = c_current.next
return 1
-cdef int canDeallocateAttributes(xmlNode* c_node):
- cdef xmlAttr* c_current
- c_current = c_node.properties
+################################################################################
+# change _Document references when a node changes documents
+
+cdef void moveNodeToDocument(_NodeBase node, _Document doc):
+ """For a node and all nodes below, change document.
+
+ A node can change document in certain operations as an XML
+ subtree can move. This updates all possible proxies in the
+ tree below (including the current node). It also reconciliates
+ namespaces so they're correct inside the new environment.
+ """
+ tree.xmlReconciliateNs(doc._c_doc, node._c_node)
+ if node._doc is not doc:
+ changeDocumentBelow(node._c_node, doc)
+
+cdef void changeDocumentBelow(xmlNode* c_node, _Document doc):
+ """Update the Python references in the tree below the node.
+
+ Note that we expect C pointers to the document to be updated already by
+ libxml2.
+ """
+ cdef xmlNode* c_current
+ # adjust all children recursively
+ c_current = c_node.children
while c_current is not NULL:
- if c_current._private is not NULL:
- return 0
- # only check child nodes, don't try checking properties as
- # attribute has none
- if not canDeallocateChildNodes(c_current):
- return 0
+ if _isElement(c_current):
+ changeDocumentBelow(c_current, doc)
c_current = c_current.next
- # apparently we can deallocate all subnodes
- return 1
-
-cdef int canDeallocateChildren(xmlNode* c_node):
- # the current implementation is inefficient as it does a
- # tree traversal to find out whether there are any node proxies
- # we could improve this by a smarter datastructure
- # check children
- if not canDeallocateChildNodes(c_node):
- return 0
- # check any attributes
- if (c_node.type == tree.XML_ELEMENT_NODE and
- not canDeallocateAttributes(c_node)):
- return 0
- # apparently we can deallocate all subnodes
- return 1
+ # adjust Python reference of current node
+ if c_node._private is not NULL:
+ (<_NodeBase>c_node._private)._doc = doc
From scoder at codespeak.net Wed May 31 09:38:31 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 31 May 2006 09:38:31 +0200 (CEST)
Subject: [Lxml-checkins] r27947 - in lxml/trunk: doc src/lxml
Message-ID: <20060531073831.BFE6810053@code0.codespeak.net>
Author: scoder
Date: Wed May 31 09:38:15 2006
New Revision: 27947
Modified:
lxml/trunk/doc/performance.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/etree.h
lxml/trunk/src/lxml/proxy.pxi
lxml/trunk/src/lxml/tree.pxd
Log:
C macro implementation of an iterative tree walker: reduces code duplication between various functions and speeds up tree walking operations by up to 30% (deallocation, iteration, etc.)
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Wed May 31 09:38:15 2006
@@ -194,37 +194,37 @@
especially if few elements are of interest or the element tag name is known,
lxml is a good choice::
- lxe: getiterator_all (-- T2 ) 31.2719 msec/pass
+ lxe: getiterator_all (-- T2 ) 23.0440 msec/pass
cET: getiterator_all (-- T2 ) 36.3687 msec/pass
ET : getiterator_all (-- T2 ) 46.2846 msec/pass
- lxe: getiterator_islice (-- T2 ) 2.8503 msec/pass
+ lxe: getiterator_islice (-- T2 ) 2.0699 msec/pass
cET: getiterator_islice (-- T2 ) 0.3299 msec/pass
ET : getiterator_islice (-- T2 ) 44.5898 msec/pass
- lxe: getiterator_tag (-- T2 ) 3.0983 msec/pass
+ lxe: getiterator_tag (-- T2 ) 1.9176 msec/pass
cET: getiterator_tag (-- T2 ) 11.2861 msec/pass
ET : getiterator_tag (-- T2 ) 37.5661 msec/pass
- lxe: getiterator_tag_all (-- T2 ) 4.9760 msec/pass
+ lxe: getiterator_tag_all (-- T2 ) 4.5722 msec/pass
cET: getiterator_tag_all (-- T2 ) 33.2602 msec/pass
ET : getiterator_tag_all (-- T2 ) 37.6200 msec/pass
This similarly shows in ``Element.findall()``::
- lxe: findall (-- T2 ) 36.4730 msec/pass
+ lxe: findall (-- T2 ) 27.3874 msec/pass
cET: findall (-- T2 ) 38.8718 msec/pass
ET : findall (-- T2 ) 50.9692 msec/pass
- lxe: findall (-- T3 ) 4.3956 msec/pass
+ lxe: findall (-- T3 ) 3.8227 msec/pass
cET: findall (-- T3 ) 11.8051 msec/pass
ET : findall (-- T3 ) 11.2570 msec/pass
- lxe: findall_tag (-- T2 ) 4.3950 msec/pass
+ lxe: findall_tag (-- T2 ) 4.5549 msec/pass
cET: findall_tag (-- T2 ) 31.3107 msec/pass
ET : findall_tag (-- T2 ) 36.7813 msec/pass
- lxe: findall_tag (-- T3 ) 0.5946 msec/pass
+ lxe: findall_tag (-- T3 ) 0.5643 msec/pass
cET: findall_tag (-- T3 ) 7.4491 msec/pass
ET : findall_tag (-- T3 ) 9.2943 msec/pass
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Wed May 31 09:38:15 2006
@@ -267,34 +267,13 @@
2) its descendents
3) its following siblings.
"""
- cdef xmlNode* c_next
- cdef xmlNode* c_start_parent
if c_name is NULL:
# always match
return c_node
- if c_node is NULL:
- return NULL
- c_start_parent = c_node.parent
- while c_node is not NULL:
- if _tagMatches(c_node, c_href, c_name):
- return c_node
- # walk through children
- c_next = c_node.children
- if c_next is NULL:
- # sibling?
- c_next = _nextElement(c_node)
- elif not _isElement(c_next):
- # we need an element
- c_next = _nextElement(c_next)
- if c_next is NULL:
- c_next = _nextElement(c_node)
- # back off through parents
- while c_next is NULL:
- c_node = c_node.parent
- if c_node is c_start_parent:
- return NULL
- c_next = _nextElement(c_node)
- c_node = c_next
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node)
+ if _tagMatches(c_node, c_href, c_name):
+ return c_node
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
return NULL
cdef int _tagMatches(xmlNode* c_node, char* c_href, char* c_name):
Modified: lxml/trunk/src/lxml/etree.h
==============================================================================
--- lxml/trunk/src/lxml/etree.h (original)
+++ lxml/trunk/src/lxml/etree.h Wed May 31 09:38:15 2006
@@ -1,6 +1,10 @@
#ifndef HAS_ETREE_H
#define HAS_ETREE_H
+/* v_arg functions */
+#define va_int(ap) va_arg(ap, int)
+#define va_charptr(ap) va_arg(ap, char *)
+
/* Py_ssize_t support was added in Python 2.5 */
#if PY_VERSION_HEX < 0x02050000
#ifndef PY_SSIZE_T_MAX /* patched Pyrex? */
@@ -19,12 +23,61 @@
#define str(o) PyObject_Str(o)
#define iter(o) PyObject_GetIter(o)
#define _cstr(s) PyString_AS_STRING(s)
+
#define _isElement(c_node) \
((c_node)->type == XML_ELEMENT_NODE || \
(c_node)->type == XML_COMMENT_NODE)
-/* v_arg functions */
-#define va_int(ap) va_arg(ap, int)
-#define va_charptr(ap) va_arg(ap, char *)
+/* Macro set implementation of a depth first tree walker
+ *
+ * Calls the code block between the BEGIN and END macros
+ * 1) for the start element (or the first 'element' sibling)
+ * 2) for all children (recursively)
+ * 3) all siblings (recursively)
+ *
+ * Usage in Pyrex:
+ * cdef xmlNode* some_node
+ * some_node = parent_node.children
+ * BEGIN_FOR_EACH_ELEMENT_FROM(some_node)
+ * # do something with some_node
+ * END_FOR_EACH_ELEMENT_FROM(some_node)
+ *
+ * NOTE: 'some_node' MUST be a plain 'xmlNode*' !
+ * NOTE: parent modification during the walk will segfault !
+ */
+
+#define BEGIN_FOR_EACH_ELEMENT_FROM(c_node) \
+{ \
+ while ((c_node != 0) && (!_isElement(c_node))) \
+ c_node = c_node->next; \
+ if (c_node != 0) { \
+ xmlNode* ___start_parent = c_node->parent; \
+ xmlNode* ___next; \
+ while (c_node != 0) {
+ /* here goes the code to be run for each element */
+#define END_FOR_EACH_ELEMENT_FROM(c_node) \
+ /* walk through children */ \
+ ___next = c_node->children; \
+ while ((___next != 0) && (!_isElement(___next))) \
+ ___next = ___next->next; \
+ if (___next == 0) { \
+ /* try siblings */ \
+ ___next = c_node->next; \
+ while ((___next != 0) && (!_isElement(___next))) \
+ ___next = ___next->next; \
+ } \
+ /* back off through parents */ \
+ while (___next == 0) { \
+ c_node = c_node->parent; \
+ if (c_node == ___start_parent) \
+ break; \
+ ___next = c_node->next; \
+ while ((___next != 0) && (!_isElement(___next))) \
+ ___next = ___next->next; \
+ } \
+ c_node = ___next; \
+ } \
+ } \
+}
#endif /*HAS_ETREE_H*/
Modified: lxml/trunk/src/lxml/proxy.pxi
==============================================================================
--- lxml/trunk/src/lxml/proxy.pxi (original)
+++ lxml/trunk/src/lxml/proxy.pxi Wed May 31 09:38:15 2006
@@ -136,15 +136,11 @@
return NULL
cdef int canDeallocateChildNodes(xmlNode* c_node):
- cdef xmlNode* c_current
- c_current = c_node.children
- while c_current is not NULL:
- if _isElement(c_current):
- if c_current._private is not NULL:
- return 0
- if not canDeallocateChildNodes(c_current):
- return 0
- c_current = c_current.next
+ c_node = c_node.children
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node)
+ if c_node._private is not NULL:
+ return 0
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
return 1
################################################################################
@@ -160,22 +156,18 @@
"""
tree.xmlReconciliateNs(doc._c_doc, node._c_node)
if node._doc is not doc:
+ node._doc = doc
changeDocumentBelow(node._c_node, doc)
cdef void changeDocumentBelow(xmlNode* c_node, _Document doc):
"""Update the Python references in the tree below the node.
+ Does not update the node itself.
Note that we expect C pointers to the document to be updated already by
libxml2.
"""
- cdef xmlNode* c_current
- # adjust all children recursively
- c_current = c_node.children
- while c_current is not NULL:
- if _isElement(c_current):
- changeDocumentBelow(c_current, doc)
- c_current = c_current.next
-
- # adjust Python reference of current node
+ c_node = c_node.children
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node)
if c_node._private is not NULL:
(<_NodeBase>c_node._private)._doc = doc
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Wed May 31 09:38:15 2006
@@ -248,3 +248,5 @@
cdef extern from "etree.h":
cdef int _isElement(xmlNode* node)
+ cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* node)
+ cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* node)
From scoder at codespeak.net Wed May 31 09:54:00 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 31 May 2006 09:54:00 +0200 (CEST)
Subject: [Lxml-checkins] r27948 - lxml/trunk/doc
Message-ID: <20060531075400.4EF5510053@code0.codespeak.net>
Author: scoder
Date: Wed May 31 09:53:48 2006
New Revision: 27948
Modified:
lxml/trunk/doc/performance.txt
Log:
updated benchmark results
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Wed May 31 09:53:48 2006
@@ -90,10 +90,10 @@
(given in seconds)::
lxe: -- S- U- -A SA UA
- T1: 0.1360 0.1236 0.1241 0.1243 0.1261 0.1254
- T2: 0.1281 0.1282 0.1299 0.1381 0.1389 0.1395
- T3: 0.0366 0.0300 0.0290 0.0850 0.0851 0.0893
- T4: 0.0010 0.0006 0.0006 0.0018 0.0018 0.0019
+ T1: 0.1360 0.1214 0.1214 0.1217 0.1232 0.1226
+ T2: 0.1258 0.1257 0.1250 0.1348 0.1359 0.1358
+ T3: 0.0354 0.0282 0.0288 0.0850 0.0860 0.0862
+ T4: 0.0006 0.0006 0.0006 0.0019 0.0018 0.0019
cET: -- S- U- -A SA UA
T1: 0.0417 0.0409 0.0403 0.0410 0.0410 0.0415
@@ -125,13 +125,13 @@
are supposed to end up in, either as SubElements of an Element or using the
explicit ``Element.makeelement()`` call::
- lxe: makeelement (-- T2 ) 4.3003 msec/pass
- cET: makeelement (-- T2 ) 0.5520 msec/pass
- ET : makeelement (-- T2 ) 3.8092 msec/pass
-
- lxe: create_subelements (-- T2 ) 3.9673 msec/pass
- cET: create_subelements (-- T2 ) 0.5666 msec/pass
- ET : create_subelements (-- T2 ) 6.4613 msec/pass
+ lxe: makeelement (-- T2 ) 4.2658 msec/pass
+ cET: makeelement (-- T2 ) 0.5658 msec/pass
+ ET : makeelement (-- T2 ) 3.7136 msec/pass
+
+ lxe: create_subelements (-- T2 ) 3.7640 msec/pass
+ cET: create_subelements (-- T2 ) 0.5332 msec/pass
+ ET : create_subelements (-- T2 ) 6.5937 msec/pass
So, if the main performance bottleneck of an application is creating large XML
trees in memory through calls to Element and SubElement, cET is the best
@@ -148,9 +148,9 @@
cET: append_from_document (-- T1,T2) 0.4673 msec/pass
ET : append_from_document (-- T1,T2) 2.0460 msec/pass
- lxe: append_from_document (-- T3,T4) 0.2017 msec/pass
- cET: append_from_document (-- T3,T4) 0.0227 msec/pass
- ET : append_from_document (-- T3,T4) 0.1563 msec/pass
+ lxe: append_from_document (-- T3,T4) 0.1582 msec/pass
+ cET: append_from_document (-- T3,T4) 0.0224 msec/pass
+ ET : append_from_document (-- T3,T4) 0.1618 msec/pass
Although these are fairly small numbers compared to parsing, this easily shows
the different performance classes for lxml and (c)ET. Where the latter do not
@@ -161,9 +161,9 @@
This difference is not always as visible, but applies to most parts of the
API, like inserting newly created elements::
- lxe: insert_from_document (-- T1,T2) 16.4772 msec/pass
- cET: insert_from_document (-- T1,T2) 1.1874 msec/pass
- ET : insert_from_document (-- T1,T2) 3.5447 msec/pass
+ lxe: insert_from_document (-- T1,T2) 16.2342 msec/pass
+ cET: insert_from_document (-- T1,T2) 1.1786 msec/pass
+ ET : insert_from_document (-- T1,T2) 3.6107 msec/pass
Or replacing the child slice by a new element::
From scoder at codespeak.net Wed May 31 09:55:04 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 31 May 2006 09:55:04 +0200 (CEST)
Subject: [Lxml-checkins] r27949 - lxml/trunk/doc
Message-ID: <20060531075504.1DD7C10053@code0.codespeak.net>
Author: scoder
Date: Wed May 31 09:54:52 2006
New Revision: 27949
Modified:
lxml/trunk/doc/performance.txt
Log:
doc updates
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Wed May 31 09:54:52 2006
@@ -13,8 +13,8 @@
The statements made here are backed by the benchmark script `bench.py`_ that
comes with the lxml source distribution. The timings cited below compare lxml
-1.0, ElementTree 1.2.6 and cElementTree 1.0.5 under CPython 2.4.2 on an AMD64
-machine.
+1.0, ElementTree 1.2.6 and cElementTree 1.0.5 under CPython 2.4.2 on a 1.6GHz
+AMD64 machine.
.. _`bench.py`: http://codespeak.net/svn/lxml/trunk/bench.py
From scoder at codespeak.net Wed May 31 10:35:29 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 31 May 2006 10:35:29 +0200 (CEST)
Subject: [Lxml-checkins] r27951 - lxml/trunk/src/lxml
Message-ID: <20060531083529.BA1491005A@code0.codespeak.net>
Author: scoder
Date: Wed May 31 10:35:28 2006
New Revision: 27951
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
Log:
cleanup
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Wed May 31 10:35:28 2006
@@ -249,16 +249,13 @@
c_node = c_node.children
if c_node is NULL:
return NULL
- if not _isElement(c_node):
- c_node = _nextElement(c_node)
return _findDepthFirstInFollowing(c_node, c_href, c_name)
cdef xmlNode* _findDepthFirstInFollowingSiblings(xmlNode* c_node,
char* c_href, char* c_name):
if c_node is NULL:
return NULL
- c_node = _nextElement(c_node)
- return _findDepthFirstInFollowing(c_node, c_href, c_name)
+ return _findDepthFirstInFollowing(c_node.next, c_href, c_name)
cdef xmlNode* _findDepthFirstInFollowing(xmlNode* c_node,
char* c_href, char* c_name):
@@ -267,9 +264,6 @@
2) its descendents
3) its following siblings.
"""
- if c_name is NULL:
- # always match
- return c_node
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node)
if _tagMatches(c_node, c_href, c_name):
return c_node
From scoder at codespeak.net Wed May 31 10:38:27 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 31 May 2006 10:38:27 +0200 (CEST)
Subject: [Lxml-checkins] r27952 - lxml/trunk
Message-ID: <20060531083827.9A46F1005A@code0.codespeak.net>
Author: scoder
Date: Wed May 31 10:38:25 2006
New Revision: 27952
Modified:
lxml/trunk/CHANGES.txt
Log:
updated CHANGES.txt for speedups
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed May 31 10:38:25 2006
@@ -7,6 +7,10 @@
Features added
--------------
+* Another speedup in tree iteration code
+
+* General speedup of Python Element object creation and deallocation
+
* Writing C14N no longer serializes in memory (reduced memory footprint)
* PyErrorLog for error logging through the Python ``logging`` module
From scoder at codespeak.net Wed May 31 15:17:09 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 31 May 2006 15:17:09 +0200 (CEST)
Subject: [Lxml-checkins] r27980 - in lxml/trunk: . doc src/lxml
src/lxml/tests
Message-ID: <20060531131709.7F3401005A@code0.codespeak.net>
Author: scoder
Date: Wed May 31 15:17:05 2006
New Revision: 27980
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/performance.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/etree.h
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/proxy.pxi
lxml/trunk/src/lxml/tests/test_etree.py
lxml/trunk/src/lxml/tree.pxd
Log:
generalized tree walker to merge code also with ElementDepthFirstIterator, support '{ns}*' in filter
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed May 31 15:17:05 2006
@@ -7,6 +7,9 @@
Features added
--------------
+* Element.getiterator() supports iterating through namespaces with the tag
+ expression '{namespace}*'
+
* Another speedup in tree iteration code
* General speedup of Python Element object creation and deallocation
@@ -28,6 +31,8 @@
Bugs fixed
----------
+* Element now raises ValueError on empty tag names
+
* Namespace fixing after moving elements between documents could fail if the
source document was freed too early
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Wed May 31 15:17:05 2006
@@ -194,38 +194,38 @@
especially if few elements are of interest or the element tag name is known,
lxml is a good choice::
- lxe: getiterator_all (-- T2 ) 23.0440 msec/pass
- cET: getiterator_all (-- T2 ) 36.3687 msec/pass
+ lxe: getiterator_all (-- T2 ) 22.5847 msec/pass
+ cET: getiterator_all (-- T2 ) 36.8212 msec/pass
ET : getiterator_all (-- T2 ) 46.2846 msec/pass
- lxe: getiterator_islice (-- T2 ) 2.0699 msec/pass
- cET: getiterator_islice (-- T2 ) 0.3299 msec/pass
+ lxe: getiterator_islice (-- T2 ) 2.0421 msec/pass
+ cET: getiterator_islice (-- T2 ) 0.3343 msec/pass
ET : getiterator_islice (-- T2 ) 44.5898 msec/pass
- lxe: getiterator_tag (-- T2 ) 1.9176 msec/pass
- cET: getiterator_tag (-- T2 ) 11.2861 msec/pass
+ lxe: getiterator_tag (-- T2 ) 1.9593 msec/pass
+ cET: getiterator_tag (-- T2 ) 11.7767 msec/pass
ET : getiterator_tag (-- T2 ) 37.5661 msec/pass
- lxe: getiterator_tag_all (-- T2 ) 4.5722 msec/pass
- cET: getiterator_tag_all (-- T2 ) 33.2602 msec/pass
+ lxe: getiterator_tag_all (-- T2 ) 4.5667 msec/pass
+ cET: getiterator_tag_all (-- T2 ) 33.5681 msec/pass
ET : getiterator_tag_all (-- T2 ) 37.6200 msec/pass
This similarly shows in ``Element.findall()``::
- lxe: findall (-- T2 ) 27.3874 msec/pass
- cET: findall (-- T2 ) 38.8718 msec/pass
+ lxe: findall (-- T2 ) 26.9907 msec/pass
+ cET: findall (-- T2 ) 39.1728 msec/pass
ET : findall (-- T2 ) 50.9692 msec/pass
- lxe: findall (-- T3 ) 3.8227 msec/pass
- cET: findall (-- T3 ) 11.8051 msec/pass
+ lxe: findall (-- T3 ) 3.6452 msec/pass
+ cET: findall (-- T3 ) 12.0210 msec/pass
ET : findall (-- T3 ) 11.2570 msec/pass
- lxe: findall_tag (-- T2 ) 4.5549 msec/pass
- cET: findall_tag (-- T2 ) 31.3107 msec/pass
+ lxe: findall_tag (-- T2 ) 4.6065 msec/pass
+ cET: findall_tag (-- T2 ) 34.0267 msec/pass
ET : findall_tag (-- T2 ) 36.7813 msec/pass
- lxe: findall_tag (-- T3 ) 0.5643 msec/pass
- cET: findall_tag (-- T3 ) 7.4491 msec/pass
+ lxe: findall_tag (-- T3 ) 0.5884 msec/pass
+ cET: findall_tag (-- T3 ) 7.6307 msec/pass
ET : findall_tag (-- T3 ) 9.2943 msec/pass
Note that all three libraries currently use the same Python implementation for
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Wed May 31 15:17:05 2006
@@ -242,39 +242,16 @@
c_node = c_node.prev
return NULL
-cdef xmlNode* _findDepthFirstInDescendents(xmlNode* c_node,
- char* c_href, char* c_name):
- if c_node is NULL:
- return NULL
- c_node = c_node.children
- if c_node is NULL:
- return NULL
- return _findDepthFirstInFollowing(c_node, c_href, c_name)
-
-cdef xmlNode* _findDepthFirstInFollowingSiblings(xmlNode* c_node,
- char* c_href, char* c_name):
- if c_node is NULL:
- return NULL
- return _findDepthFirstInFollowing(c_node.next, c_href, c_name)
-
-cdef xmlNode* _findDepthFirstInFollowing(xmlNode* c_node,
- char* c_href, char* c_name):
- """Find the next matching node by traversing:
- 1) the node itself
- 2) its descendents
- 3) its following siblings.
- """
- tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node)
- if _tagMatches(c_node, c_href, c_name):
- return c_node
- tree.END_FOR_EACH_ELEMENT_FROM(c_node)
- return NULL
-
cdef int _tagMatches(xmlNode* c_node, char* c_href, char* c_name):
if c_name is NULL:
- # always match
- return 1
- if c_href is NULL:
+ if c_href is NULL:
+ # always match
+ return 1
+ elif c_node.ns is NULL or c_node.ns.href is NULL:
+ return 0
+ else:
+ return cstd.strcmp(c_node.ns.href, c_href) == 0
+ elif c_href is NULL:
if c_node.ns is not NULL and c_node.ns.href is not NULL:
return 0
return cstd.strcmp(c_node.name, c_name) == 0
@@ -363,10 +340,11 @@
raise ValueError, "Invalid tag name"
nslen = c_ns_end - c_tag
taglen = python.PyString_GET_SIZE(tag) - nslen - 2
- ns = python.PyString_FromStringAndSize(c_tag, nslen)
+ if taglen == 0:
+ raise ValueError, "Empty tag name"
+ if nslen > 0:
+ ns = python.PyString_FromStringAndSize(c_tag, nslen)
tag = python.PyString_FromStringAndSize(c_ns_end+1, taglen)
- else:
- ns = None
return ns, tag
cdef object _namespacedName(xmlNode* c_node):
Modified: lxml/trunk/src/lxml/etree.h
==============================================================================
--- lxml/trunk/src/lxml/etree.h (original)
+++ lxml/trunk/src/lxml/etree.h Wed May 31 15:17:05 2006
@@ -28,56 +28,109 @@
((c_node)->type == XML_ELEMENT_NODE || \
(c_node)->type == XML_COMMENT_NODE)
-/* Macro set implementation of a depth first tree walker
+/* Macro pair implementation of a depth first tree walker
*
- * Calls the code block between the BEGIN and END macros
- * 1) for the start element (or the first 'element' sibling)
- * 2) for all children (recursively)
- * 3) all siblings (recursively)
+ * Calls the code block between the BEGIN and END macros for all elements
+ * below c_tree_top (exclusively), starting at c_node (inclusively iff
+ * 'inclusive' is 1).
+ *
+ * To traverse the node and all of its children and siblings in Pyrex, call
+ * cdef xmlNode* some_node
+ * BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 1)
+ * # do something with some_node
+ * END_FOR_EACH_ELEMENT_FROM(some_node)
*
- * Usage in Pyrex:
+ * To traverse only the children and siblings of a node, call
+ * cdef xmlNode* some_node
+ * BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 0)
+ * # do something with some_node
+ * END_FOR_EACH_ELEMENT_FROM(some_node)
+ *
+ * To traverse only the children, do:
* cdef xmlNode* some_node
* some_node = parent_node.children
- * BEGIN_FOR_EACH_ELEMENT_FROM(some_node)
+ * BEGIN_FOR_EACH_ELEMENT_FROM(parent_node, some_node, 1)
* # do something with some_node
* END_FOR_EACH_ELEMENT_FROM(some_node)
*
* NOTE: 'some_node' MUST be a plain 'xmlNode*' !
- * NOTE: parent modification during the walk will segfault !
+ *
+ * NOTE: parent modification during the walk can divert the iterator, but
+ * should not segfault !
*/
-#define BEGIN_FOR_EACH_ELEMENT_FROM(c_node) \
-{ \
- while ((c_node != 0) && (!_isElement(c_node))) \
- c_node = c_node->next; \
- if (c_node != 0) { \
- xmlNode* ___start_parent = c_node->parent; \
- xmlNode* ___next; \
- while (c_node != 0) {
+#define ADVANCE_TO_NEXT_ELEMENT(c_node) \
+ while ((c_node != 0) && (!_isElement(c_node))) \
+ c_node = c_node->next;
+
+#define BEGIN_FOR_EACH_ELEMENT_FROM(c_tree_top, c_node, inclusive) \
+{ \
+ xmlNode* ___next; \
+ const xmlNode* ___tree_top = (c_tree_top); \
+ /* make sure we have an element or NULL */ \
+ if (c_node != 0) { \
+ if (!_isElement(c_node)) { \
+ /* we skip the node, so 'inclusive' is irrelevant */ \
+ if (c_node == ___tree_top) \
+ c_node = 0; /* nothing to traverse */ \
+ else { \
+ c_node = c_node->next; \
+ ADVANCE_TO_NEXT_ELEMENT(c_node) \
+ } \
+ } else if (! (inclusive)) { \
+ /* duplicated for speed: find the second node */ \
+ /* walk through children */ \
+ ___next = c_node->children; \
+ ADVANCE_TO_NEXT_ELEMENT(___next) \
+ if ((___next == 0) && (c_node != ___tree_top)) { \
+ /* try siblings */ \
+ ___next = c_node->next; \
+ ADVANCE_TO_NEXT_ELEMENT(___next) \
+ /* back off through parents */ \
+ while (___next == 0) { \
+ c_node = c_node->parent; \
+ if (c_node == 0) \
+ break; \
+ if (c_node == ___tree_top) \
+ break; \
+ if (!_isElement(c_node)) \
+ break; \
+ ___next = c_node->next; \
+ ADVANCE_TO_NEXT_ELEMENT(___next) \
+ } \
+ } \
+ c_node = ___next; \
+ } \
+ \
+ /* now run the user code on the elements we find */ \
+ while (c_node != 0) { \
/* here goes the code to be run for each element */
-#define END_FOR_EACH_ELEMENT_FROM(c_node) \
- /* walk through children */ \
- ___next = c_node->children; \
- while ((___next != 0) && (!_isElement(___next))) \
- ___next = ___next->next; \
- if (___next == 0) { \
- /* try siblings */ \
- ___next = c_node->next; \
- while ((___next != 0) && (!_isElement(___next))) \
- ___next = ___next->next; \
- } \
- /* back off through parents */ \
- while (___next == 0) { \
- c_node = c_node->parent; \
- if (c_node == ___start_parent) \
- break; \
- ___next = c_node->next; \
- while ((___next != 0) && (!_isElement(___next))) \
- ___next = ___next->next; \
- } \
- c_node = ___next; \
- } \
- } \
+
+#define END_FOR_EACH_ELEMENT_FROM(c_node) \
+ /* walk through children */ \
+ ___next = c_node->children; \
+ ADVANCE_TO_NEXT_ELEMENT(___next) \
+ if ((___next == 0) && (c_node != ___tree_top)) { \
+ /* try siblings */ \
+ ___next = c_node->next; \
+ ADVANCE_TO_NEXT_ELEMENT(___next) \
+ /* back off through parents */ \
+ while (___next == 0) { \
+ c_node = c_node->parent; \
+ if (c_node == 0) \
+ break; \
+ if (c_node == ___tree_top) \
+ break; \
+ if (!_isElement(c_node)) \
+ break; \
+ ___next = c_node->next; \
+ ADVANCE_TO_NEXT_ELEMENT(___next) \
+ } \
+ } \
+ c_node = ___next; \
+ } \
+ } \
}
+
#endif /*HAS_ETREE_H*/
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Wed May 31 15:17:05 2006
@@ -1230,15 +1230,13 @@
# we keep Python references here to control GC
# keep next node to return and a depth counter in the tree
cdef _NodeBase _next_node
- cdef Py_ssize_t _depth
+ cdef _NodeBase _top_node
cdef object _pystrings
cdef char* _href
cdef char* _name
def __init__(self, _NodeBase node not None, tag=None):
+ self._top_node = node
self._next_node = node
- self._depth = 0
- if tag == '*':
- tag = None
if tag is None:
self._href = NULL
self._name = NULL
@@ -1249,10 +1247,11 @@
else:
self._href = _cstr(self._pystrings[0])
self._name = _cstr(self._pystrings[1])
-
- if not _tagMatches(node._c_node, self._href, self._name):
- # this cannot raise StopIteration, self._next_node != None
- self.next()
+ if cstd.strcmp(self._name, '*') == 0:
+ self._name = NULL
+ if not _tagMatches(node._c_node, self._href, self._name):
+ # this cannot raise StopIteration, self._next_node != None
+ self.next()
def __iter__(self):
return self
@@ -1262,48 +1261,30 @@
current_node = self._next_node
if current_node is None:
raise StopIteration
- self._prepareNextNode()
+ if self._name is NULL and self._href is NULL:
+ self._prepareNextNodeAnyTag()
+ else:
+ self._prepareNextNodeMatchTag()
return current_node
- cdef void _prepareNextNode(self):
- cdef _NodeBase node
+ cdef void _prepareNextNodeAnyTag(self):
+ cdef xmlNode* c_node
+ c_node = self._next_node._c_node
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(self._top_node._c_node, c_node, 0)
+ self._next_node = _elementFactory(self._next_node._doc, c_node)
+ return
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+ self._next_node = None
+
+ cdef void _prepareNextNodeMatchTag(self):
cdef xmlNode* c_node
- cdef xmlNode* c_next_node
- cdef xmlNode* c_parent
- # find in descendants
- node = self._next_node
- c_parent = node._c_node
- c_node = _findDepthFirstInDescendents(c_parent, self._href, self._name)
- if c_node is NULL:
- if self._depth < 1:
- # nothing left to traverse
- self._next_node = None
- return
- # try siblings
- c_node = _findDepthFirstInFollowingSiblings(
- c_parent, self._href, self._name)
-
- while c_node is NULL and self._depth > 1:
- # walk up the parent pointers and continue with their siblings
- c_parent = c_parent.parent
- self._depth = self._depth - 1
- if c_parent is NULL or not _isElement(c_parent):
- break
- c_node = _findDepthFirstInFollowingSiblings(
- c_parent, self._href, self._name)
-
- if c_node is NULL or not _isElement(c_parent):
- self._next_node = None
- return # all found, nothing left
- # we are at a sibling, so set c_parent to our parent
- c_parent = c_parent.parent
-
- c_next_node = c_node
- # fix depth counter by looking up path to original parent
- while c_node is not c_parent:
- self._depth = self._depth + 1
- c_node = c_node.parent
- self._next_node = _elementFactory(node._doc, c_next_node)
+ c_node = self._next_node._c_node
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(self._top_node._c_node, c_node, 0)
+ if _tagMatches(c_node, self._href, self._name):
+ self._next_node = _elementFactory(self._next_node._doc, c_node)
+ return
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+ self._next_node = None
cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf) except NULL:
cdef xmlNode* c_node
Modified: lxml/trunk/src/lxml/proxy.pxi
==============================================================================
--- lxml/trunk/src/lxml/proxy.pxi (original)
+++ lxml/trunk/src/lxml/proxy.pxi Wed May 31 15:17:05 2006
@@ -135,9 +135,10 @@
else:
return NULL
-cdef int canDeallocateChildNodes(xmlNode* c_node):
- c_node = c_node.children
- tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node)
+cdef int canDeallocateChildNodes(xmlNode* c_parent):
+ cdef xmlNode* c_node
+ c_node = c_parent.children
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_parent, c_node, 1)
if c_node._private is not NULL:
return 0
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
@@ -159,15 +160,16 @@
node._doc = doc
changeDocumentBelow(node._c_node, doc)
-cdef void changeDocumentBelow(xmlNode* c_node, _Document doc):
+cdef void changeDocumentBelow(xmlNode* c_parent, _Document doc):
"""Update the Python references in the tree below the node.
Does not update the node itself.
Note that we expect C pointers to the document to be updated already by
libxml2.
"""
- c_node = c_node.children
- tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node)
+ cdef xmlNode* c_node
+ c_node = c_parent.children
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_parent, c_node, 1)
if c_node._private is not NULL:
(<_NodeBase>c_node._private)._doc = doc
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Wed May 31 15:17:05 2006
@@ -28,6 +28,16 @@
self.assert_(etree.__version__.startswith(
str(etree.LXML_VERSION[0])))
+ def test_element_names(self):
+ Element = self.etree.Element
+
+ el = Element('name')
+ self.assertEquals(el.tag, 'name')
+ el = Element('{}name')
+ self.assertEquals(el.tag, 'name')
+ self.assertRaises(ValueError, Element, '{test}')
+ self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
+
def test_parse_error(self):
parse = self.etree.parse
# from StringIO
@@ -436,6 +446,33 @@
' ',
self._writeElement(e))
+ def test_getiterator_filter_namespace(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('{a}a')
+ b = SubElement(a, '{a}b')
+ c = SubElement(a, '{a}c')
+ d = SubElement(b, '{b}d')
+ e = SubElement(c, '{a}e')
+ f = SubElement(c, '{b}f')
+
+ self.assertEquals(
+ [a],
+ list(a.getiterator('{a}a')))
+ self.assertEquals(
+ [],
+ list(a.getiterator('{b}a')))
+ self.assertEquals(
+ [],
+ list(a.getiterator('a')))
+ self.assertEquals(
+ [f],
+ list(c.getiterator('{b}*')))
+ self.assertEquals(
+ [d, f],
+ list(a.getiterator('{b}*')))
+
def test_index(self):
etree = self.etree
e = etree.Element('foo')
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Wed May 31 15:17:05 2006
@@ -248,5 +248,6 @@
cdef extern from "etree.h":
cdef int _isElement(xmlNode* node)
- cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* node)
- cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* node)
+ cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top,
+ xmlNode* start_node, int inclusive)
+ cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node)
From scoder at codespeak.net Wed May 31 15:57:12 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 31 May 2006 15:57:12 +0200 (CEST)
Subject: [Lxml-checkins] r27989 - lxml/www
Message-ID: <20060531135712.42A6110060@code0.codespeak.net>
Author: scoder
Date: Wed May 31 15:57:10 2006
New Revision: 27989
Modified:
lxml/www/publish.py
Log:
new doc files: performance.txt, resolvers.txt
Modified: lxml/www/publish.py
==============================================================================
--- lxml/www/publish.py (original)
+++ lxml/www/publish.py Wed May 31 15:57:10 2006
@@ -9,7 +9,7 @@
for name in ['main.txt', 'intro.txt', 'api.txt', 'compatibility.txt',
'extensions.txt', 'namespace_extensions.txt', 'sax.txt',
- 'build.txt']:
+ 'build.txt', 'performance.txt', 'resolvers.txt']:
path = os.path.join(lxml_path, 'doc', name)
outname = os.path.splitext(name)[0] + '.html'
outpath = os.path.join(dirname, outname)
From scoder at codespeak.net Wed May 31 16:00:36 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 31 May 2006 16:00:36 +0200 (CEST)
Subject: [Lxml-checkins] r27990 - lxml/trunk
Message-ID: <20060531140036.6F54510060@code0.codespeak.net>
Author: scoder
Date: Wed May 31 16:00:35 2006
New Revision: 27990
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/version.txt
Log:
rest fixes and version bump to 1.0
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed May 31 16:00:35 2006
@@ -1,14 +1,14 @@
lxml changelog
==============
-current
-=======
+1.0 (2006-06-01)
+================
Features added
--------------
* Element.getiterator() supports iterating through namespaces with the tag
- expression '{namespace}*'
+ expression ``{namespace}*``
* Another speedup in tree iteration code
@@ -53,7 +53,7 @@
* Running absolute XPath expressions on an Element now evaluates against the
root tree
-* Evaluating absolute XPath expressions ('/*') on an ElementTree could fail
+* Evaluating absolute XPath expressions (``/*``) on an ElementTree could fail
* Crashes when calling XSLT, RelaxNG, etc. with uninitialized ElementTree
objects
Modified: lxml/trunk/version.txt
==============================================================================
--- lxml/trunk/version.txt (original)
+++ lxml/trunk/version.txt Wed May 31 16:00:35 2006
@@ -1 +1 @@
-1.0.beta
+1.0
From scoder at codespeak.net Wed May 31 18:25:17 2006
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 31 May 2006 18:25:17 +0200 (CEST)
Subject: [Lxml-checkins] r28019 - in lxml/trunk: . src/lxml/tests
Message-ID: <20060531162517.1AD9A1005A@code0.codespeak.net>
Author: scoder
Date: Wed May 31 18:25:15 2006
New Revision: 28019
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/tests/test_etree.py
Log:
test case for '{namespace}*' pattern in findall()
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed May 31 18:25:15 2006
@@ -7,8 +7,8 @@
Features added
--------------
-* Element.getiterator() supports iterating through namespaces with the tag
- expression ``{namespace}*``
+* Element.getiterator() and the findall() methods support finding arbitrary
+ elements from a namespace (pattern ``{namespace}*``)
* Another speedup in tree iteration code
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Wed May 31 18:25:15 2006
@@ -473,6 +473,13 @@
[d, f],
list(a.getiterator('{b}*')))
+ def test_findall_ns(self):
+ XML = self.etree.XML
+ root = XML(' ')
+ self.assertEquals(len(root.findall(".//{X}b")), 2)
+ self.assertEquals(len(root.findall(".//{X}*")), 2)
+ self.assertEquals(len(root.findall(".//b")), 3)
+
def test_index(self):
etree = self.etree
e = etree.Element('foo')