From scoder at codespeak.net Sat Sep 1 10:45:07 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 1 Sep 2007 10:45:07 +0200 (CEST)
Subject: [Lxml-checkins] r46231 - lxml/trunk/doc
Message-ID: <20070901084507.9E7F281B0@code0.codespeak.net>
Author: scoder
Date: Sat Sep 1 10:45:04 2007
New Revision: 46231
Modified:
lxml/trunk/doc/objectify.txt
Log:
fixed test cases to reflect annotation in objectify.E factory
Modified: lxml/trunk/doc/objectify.txt
==============================================================================
--- lxml/trunk/doc/objectify.txt (original)
+++ lxml/trunk/doc/objectify.txt Sat Sep 1 10:45:04 2007
@@ -82,6 +82,13 @@
.. _`namespace specific classes`: element_classes.html#namespace-class-lookup
+To make the doctests in this document look a little nicer, we also use this:
+
+ >>> import lxml.usedoctest
+
+Imported from within a doctest, this relieves us from caring about the exact
+formatting of XML output.
+
The lxml.objectify API
======================
@@ -274,18 +281,18 @@
>>> E = objectify.E
>>> root = E.root(
- ... E.a(5),
+ ... E.a(5L),
... E.b(6.1),
... E.c(True),
... E.d("how", tell="me")
... )
>>> print etree.tostring(root, pretty_print=True)
-
- 5
- 6.1
- true
- how
+
+ 5
+ 6.1
+ true
+ how
This allows you to write up a specific language in tags::
@@ -300,9 +307,9 @@
... )
>>> print etree.tostring(root, pretty_print=True)
-
- The title
- 5
+
+ The title
+ 5
From scoder at codespeak.net Sat Sep 1 10:59:47 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 1 Sep 2007 10:59:47 +0200 (CEST)
Subject: [Lxml-checkins] r46232 - in lxml/trunk: . doc src/lxml
Message-ID: <20070901085947.AC74F81B0@code0.codespeak.net>
Author: scoder
Date: Sat Sep 1 10:59:45 2007
New Revision: 46232
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/objectify.txt
lxml/trunk/src/lxml/objectify.pyx
Log:
made annotation in objectify.ElementMaker optional through 'annotate' kw arg
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sat Sep 1 10:59:45 2007
@@ -8,6 +8,10 @@
Features added
--------------
+* Reimplemented ``objectify.E`` for better performance and improved
+ integration with objectify. Provides extended type support based on
+ registered PyTypes.
+
* XSLT objects now support deep copying
* New ``makeSubElement()`` C-API function that allows creating a new
@@ -35,8 +39,7 @@
* Schematron validation (incomplete in libxml2)
-* Extended type support for ``objectify.E`` based on registered PyTypes.
- Supports an additional argument to ``PyType()`` that takes a conversion
+* Additional ``stringify`` argument to ``PyType()`` that takes a conversion
function to strings to support setting text values from arbitrary types.
* Entity support through an ``Entity`` factory and element classes. XML
Modified: lxml/trunk/doc/objectify.txt
==============================================================================
--- lxml/trunk/doc/objectify.txt (original)
+++ lxml/trunk/doc/objectify.txt Sat Sep 1 10:59:45 2007
@@ -299,17 +299,32 @@
>>> ROOT = objectify.E.root
>>> TITLE = objectify.E.title
- >>> TYPE = objectify.E.type
+ >>> HOWMANY = getattr(objectify.E, "how-many")
>>> root = ROOT(
... TITLE("The title"),
- ... TYPE(5)
+ ... HOWMANY(5)
... )
>>> print etree.tostring(root, pretty_print=True)
The title
- 5
+ 5
+
+
+``objectify.E`` is an instance of ``objectify.ElementMaker``. By default, it
+creates pytype annotated Elements without a namespace. You can switch off the
+pytype annotation by passing False to the ``annotate`` keyword argument of the
+constructor. You can also pass a default namespace and an ``nsmap``::
+
+ >>> myE = objectify.ElementMaker(annotate=False,
+ ... namespace="http://my/ns", nsmap={None : "http://my/ns"})
+
+ >>> root = myE.root( myE.someint(2) )
+
+ >>> print etree.tostring(root, pretty_print=True)
+
+ 2
Modified: lxml/trunk/src/lxml/objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/objectify.pyx (original)
+++ lxml/trunk/src/lxml/objectify.pyx Sat Sep 1 10:59:45 2007
@@ -1047,7 +1047,9 @@
cdef object _makeelement
cdef object _namespace
cdef object _nsmap
- def __init__(self, namespace=None, nsmap=None, makeelement=None):
+ cdef int _annotate
+ def __init__(self, namespace=None, nsmap=None, annotate=True,
+ makeelement=None):
if nsmap is None:
nsmap = _DEFAULT_NSMAP
self._nsmap = nsmap
@@ -1055,6 +1057,7 @@
self._namespace = None
else:
self._namespace = "{%s}" % namespace
+ self._annotate = bool(annotate)
if makeelement is not None:
assert callable(makeelement)
self._makeelement = makeelement
@@ -1068,6 +1071,7 @@
element_maker = NEW_ELEMENT_MAKER(_ObjectifyElementMakerCaller)
element_maker._tag = tag
element_maker._nsmap = self._nsmap
+ element_maker._annotate = self._annotate
element_maker._element_factory = self._makeelement
return element_maker
@@ -1075,6 +1079,7 @@
cdef object _tag
cdef object _nsmap
cdef object _element_factory
+ cdef int _annotate
def __call__(self, *children, **attrib):
cdef _ObjectifyElementMakerCaller elementMaker
@@ -1088,6 +1093,7 @@
else:
element = self._element_factory(self._tag, attrib, self._nsmap)
+ pytype_name = None
has_children = 0
has_string_value = 0
for child in children:
@@ -1113,7 +1119,7 @@
has_children = 1
else:
if pytype_name is not None:
- # concatenation makes the result a string
+ # concatenation always makes the result a string
has_string_value = 1
pytype_name = _typename(child)
pytype = python.PyDict_GetItem(_PYTYPE_DICT, pytype_name)
@@ -1124,12 +1130,11 @@
child = str(child)
_add_text(element, child)
- if not has_children:
+ if self._annotate and not has_children:
if has_string_value:
cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, "str")
elif pytype_name is not None:
- cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE,
- pytype_name)
+ cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name)
return element
@@ -1911,6 +1916,10 @@
_parse = etree.parse
def parse(f, parser=None):
+ """Parse a file or file-like object with the objectify parser.
+
+ You can pass a different parser as second argument.
+ """
if parser is None:
parser = objectify_parser
return _parse(f, parser)
From scoder at codespeak.net Sun Sep 2 17:20:11 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 2 Sep 2007 17:20:11 +0200 (CEST)
Subject: [Lxml-checkins] r46239 - lxml/trunk
Message-ID: <20070902152011.513E78141@code0.codespeak.net>
Author: scoder
Date: Sun Sep 2 17:20:09 2007
New Revision: 46239
Modified:
lxml/trunk/setup.py
Log:
doc clarification
Modified: lxml/trunk/setup.py
==============================================================================
--- lxml/trunk/setup.py (original)
+++ lxml/trunk/setup.py Sun Sep 2 17:20:09 2007
@@ -71,10 +71,10 @@
Running ``easy_install lxml==dev`` will install it from
http://codespeak.net/svn/lxml/trunk#egg=lxml-dev
-Current bug fixes for the stable version are at
-http://codespeak.net/svn/lxml/branch/lxml-%(branch_version)s .
-Running ``easy_install lxml==%(branch_version)sbugfix`` will install this
-version from
+After an official release of a new stable series, current bug fixes might
+become available at
+http://codespeak.net/svn/lxml/branch/lxml-%(branch_version)s . Running
+``easy_install lxml==%(branch_version)sbugfix`` will install this version from
http://codespeak.net/svn/lxml/branch/lxml-%(branch_version)s#egg=lxml-%(branch_version)sbugfix
""" % { "branch_version" : versioninfo.branch_version() }) +
From scoder at codespeak.net Sun Sep 2 17:20:28 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 2 Sep 2007 17:20:28 +0200 (CEST)
Subject: [Lxml-checkins] r46240 - lxml/trunk/doc
Message-ID: <20070902152028.B66EE8141@code0.codespeak.net>
Author: scoder
Date: Sun Sep 2 17:20:28 2007
New Revision: 46240
Modified:
lxml/trunk/doc/FAQ.txt
Log:
ReST fix
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Sun Sep 2 17:20:28 2007
@@ -446,7 +446,7 @@
that problems become hard to debug and even harder to reproduce in a
predictable way. If you encounter crashes in one these systems, but your code
runs perfectly when started by hand, the following gives you a few hints for
-possible approaches to solve your specific problem::
+possible approaches to solve your specific problem:
* make sure you use recent versions of libxml2, libxslt and lxml. The libxml2
developers keep fixing bugs in each release, and lxml also tries to become
From scoder at codespeak.net Sun Sep 2 18:13:31 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 2 Sep 2007 18:13:31 +0200 (CEST)
Subject: [Lxml-checkins] r46241 - in lxml/trunk: . doc
Message-ID: <20070902161331.7F1F680B8@code0.codespeak.net>
Author: scoder
Date: Sun Sep 2 18:13:29 2007
New Revision: 46241
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/lxml2.txt
lxml/trunk/doc/main.txt
Log:
pre-release cleanup
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sun Sep 2 18:13:29 2007
@@ -2,7 +2,7 @@
lxml changelog
==============
-2.0alpha1 (2007-08-31)
+2.0alpha1 (2007-09-02)
======================
Features added
@@ -39,8 +39,9 @@
* Schematron validation (incomplete in libxml2)
-* Additional ``stringify`` argument to ``PyType()`` that takes a conversion
- function to strings to support setting text values from arbitrary types.
+* Additional ``stringify`` argument to ``objectify.PyType()`` takes a
+ conversion function to strings to support setting text values from arbitrary
+ types.
* Entity support through an ``Entity`` factory and element classes. XML
parsers now have a ``resolve_entities`` keyword argument that can be set to
Modified: lxml/trunk/doc/lxml2.txt
==============================================================================
--- lxml/trunk/doc/lxml2.txt (original)
+++ lxml/trunk/doc/lxml2.txt Sun Sep 2 18:13:29 2007
@@ -89,7 +89,7 @@
facilitate further enhancements and an improved integration between lxml's
features.
-* lxml.objectify now has its own implementation of the ``E factory``. It uses
+* lxml.objectify now has its own implementation of the `E factory`_. It uses
the built-in type lookup mechanism of lxml.objectify, thus removing the need
for an additional type registry mechanism (as previously available through
the ``typemap`` parameter).
@@ -104,6 +104,8 @@
bigger overlap with the XSLT code. The main benefits are improved thread
safety in the XPath evaluators and Python RegExp support in standard XPath.
+.. _`E factory`: objectify.html#tree-generation-with-the-e-factory
+
New modules
===========
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Sun Sep 2 18:13:29 2007
@@ -138,7 +138,7 @@
.. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/
.. _`this key`: pubkey.asc
-The latest version is `lxml 2.0alpha1`_, released 2007-08-31
+The latest version is `lxml 2.0alpha1`_, released 2007-09-02
(`changes for 2.0alpha1`_). `Older versions`_ are listed below.
.. _`Older versions`: #old-versions
From scoder at codespeak.net Sun Sep 2 18:34:07 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 2 Sep 2007 18:34:07 +0200 (CEST)
Subject: [Lxml-checkins] r46242 - lxml/trunk/doc
Message-ID: <20070902163407.0575D817B@code0.codespeak.net>
Author: scoder
Date: Sun Sep 2 18:34:07 2007
New Revision: 46242
Modified:
lxml/trunk/doc/lxml2.txt
Log:
typo
Modified: lxml/trunk/doc/lxml2.txt
==============================================================================
--- lxml/trunk/doc/lxml2.txt (original)
+++ lxml/trunk/doc/lxml2.txt Sun Sep 2 18:34:07 2007
@@ -67,7 +67,7 @@
* The type annotations in lxml.objectify (the ``pytype`` attribute) now use
``NoneType`` for the None value as this is the correct Python type name.
- Previously, lxml 1.x used a lower case ``?one``.
+ Previously, lxml 1.x used a lower case ``none``.
* Another change in objectify regards the way it deals with ambiguous types.
Previously, setting a value like the string ``"3"`` through normal attribute
From scoder at codespeak.net Mon Sep 3 11:57:29 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 3 Sep 2007 11:57:29 +0200 (CEST)
Subject: [Lxml-checkins] r46262 - lxml/trunk/src/lxml
Message-ID: <20070903095729.8D68C814C@code0.codespeak.net>
Author: scoder
Date: Mon Sep 3 11:57:27 2007
New Revision: 46262
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
use list instead of dict in _TempStore to reduce overhead
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Mon Sep 3 11:57:27 2007
@@ -172,16 +172,13 @@
cdef class _TempStore:
cdef object _storage
def __init__(self):
- self._storage = {}
+ self._storage = []
cdef void add(self, obj):
- python.PyDict_SetItem(self._storage, id(obj), obj)
+ python.PyList_Append(self._storage, obj)
cdef void clear(self):
- python.PyDict_Clear(self._storage)
-
- cdef object dictcopy(self):
- return self._storage.copy()
+ del self._storage[:]
# class for temporarily storing exceptions raised in extensions
cdef class _ExceptionContext:
From scoder at codespeak.net Mon Sep 3 12:35:21 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 3 Sep 2007 12:35:21 +0200 (CEST)
Subject: [Lxml-checkins] r46264 - lxml/trunk/src/lxml
Message-ID: <20070903103521.65813814F@code0.codespeak.net>
Author: scoder
Date: Mon Sep 3 12:35:20 2007
New Revision: 46264
Modified:
lxml/trunk/src/lxml/xslt.pxi
Log:
use separate resolver contexts for each XSLT call: exceptions and resolver temp storage must be local
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Mon Sep 3 12:35:20 2007
@@ -58,7 +58,7 @@
cdef _XSLTResolverContext _copy(self):
cdef _XSLTResolverContext context
context = _XSLTResolverContext(self._parser)
- context._c_style_doc = _copyDoc(self._c_style_doc, 1)
+ context._c_style_doc = self._c_style_doc
return context
cdef xmlDoc* _xslt_resolve_stylesheet(char* c_uri, void* context):
@@ -353,7 +353,10 @@
new_xslt._access_control = self._access_control
new_xslt._error_log = _ErrorLog()
new_xslt._context = self._context._copy()
+
new_xslt._xslt_resolver_context = self._xslt_resolver_context._copy()
+ new_xslt._xslt_resolver_context._c_style_doc = _copyDoc(
+ self._xslt_resolver_context._c_style_doc, 1)
c_doc = _copyDoc(self._c_style.doc, 1)
new_xslt._c_style = xslt.xsltParseStylesheetDoc(c_doc)
@@ -365,6 +368,7 @@
def __call__(self, _input, profile_run=False, **_kw):
cdef _XSLTContext context
+ cdef _XSLTResolverContext resolver_context
cdef _Document input_doc
cdef _Element root_node
cdef _Document result_doc
@@ -397,6 +401,9 @@
context = self._context._copy()
context.register_context(transform_ctxt, input_doc)
+ resolver_context = self._xslt_resolver_context._copy()
+ transform_ctxt._private = resolver_context
+
c_result = self._run_transform(
input_doc, c_doc, _kw, context, transform_ctxt)
@@ -412,10 +419,10 @@
self._error_log.disconnect()
try:
- if self._xslt_resolver_context._has_raised():
+ if resolver_context is not None and resolver_context._has_raised():
if c_result is not NULL:
tree.xmlFreeDoc(c_result)
- self._xslt_resolver_context._raise_if_stored()
+ resolver_context._raise_if_stored()
if c_result is NULL:
# last error seems to be the most accurate here
@@ -431,31 +438,26 @@
message = "Error applying stylesheet"
raise XSLTApplyError, message
finally:
- self._xslt_resolver_context.clear()
+ if resolver_context is not None:
+ resolver_context.clear()
result_doc = _documentFactory(c_result, input_doc._parser)
return _xsltResultTreeFactory(result_doc, self, profile_doc)
cdef xmlDoc* _run_transform(self, _Document input_doc, xmlDoc* c_input_doc,
- parameters, _XSLTContext context,
- xslt.xsltTransformContext* transform_ctxt):
+ parameters, _XSLTContext context,
+ xslt.xsltTransformContext* transform_ctxt):
cdef python.PyThreadState* state
- cdef _XSLTResolverContext resolver_context
cdef xmlDoc* c_result
cdef char** params
cdef Py_ssize_t i, parameter_count
- resolver_context = _XSLTResolverContext(input_doc._parser)
- resolver_context._c_style_doc = self._xslt_resolver_context._c_style_doc
-
xslt.xsltSetTransformErrorFunc(transform_ctxt, self._error_log,
_receiveXSLTError)
if self._access_control is not None:
self._access_control._register_in_context(transform_ctxt)
- transform_ctxt._private = self._xslt_resolver_context
-
parameter_count = python.PyDict_Size(parameters)
if parameter_count > 0:
# allocate space for parameters
@@ -463,17 +465,21 @@
# and + 1 as array is NULL terminated
params = python.PyMem_Malloc(
sizeof(char*) * (parameter_count * 2 + 1))
- i = 0
- keep_ref = []
- for key, value in parameters.iteritems():
- k = _utf8(key)
- python.PyList_Append(keep_ref, k)
- v = _utf8(value)
- python.PyList_Append(keep_ref, v)
- params[i] = _cstr(k)
- i = i + 1
- params[i] = _cstr(v)
- i = i + 1
+ try:
+ i = 0
+ keep_ref = []
+ for key, value in parameters.iteritems():
+ k = _utf8(key)
+ python.PyList_Append(keep_ref, k)
+ v = _utf8(value)
+ python.PyList_Append(keep_ref, v)
+ params[i] = _cstr(k)
+ i = i + 1
+ params[i] = _cstr(v)
+ i = i + 1
+ except:
+ python.PyMem_Free(params)
+ raise
params[i] = NULL
else:
params = NULL
From scoder at codespeak.net Mon Sep 3 12:35:45 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 3 Sep 2007 12:35:45 +0200 (CEST)
Subject: [Lxml-checkins] r46265 - lxml/trunk
Message-ID: <20070903103545.B54D7814F@code0.codespeak.net>
Author: scoder
Date: Mon Sep 3 12:35:45 2007
New Revision: 46265
Modified:
lxml/trunk/CHANGES.txt
Log:
changelog update
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Mon Sep 3 12:35:45 2007
@@ -2,6 +2,22 @@
lxml changelog
==============
+Under development
+=================
+
+Features added
+--------------
+
+Bugs fixed
+----------
+
+* Race condition in XSLT where the resolver context leaked between concurrent
+ XSLT calls
+
+Other changes
+-------------
+
+
2.0alpha1 (2007-09-02)
======================
From scoder at codespeak.net Mon Sep 3 13:36:00 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 3 Sep 2007 13:36:00 +0200 (CEST)
Subject: [Lxml-checkins] r46267 - lxml/trunk/src/lxml
Message-ID: <20070903113600.3CF5A814D@code0.codespeak.net>
Author: scoder
Date: Mon Sep 3 13:35:58 2007
New Revision: 46267
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
ET 1.3 compatible parser version
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Mon Sep 3 13:35:58 2007
@@ -484,6 +484,11 @@
return _makeElement(_tag, NULL, None, self, None, None,
attrib, nsmap, _extra)
+ property version:
+ "The version of the underlying XML parser."
+ def __get__(self):
+ return "libxml2 %d.%d.%d" % LIBXML_VERSION
+
cdef xmlDoc* _parseUnicodeDoc(self, utext, char* c_filename) except NULL:
"""Parse unicode document, share dictionary if possible.
"""
From scoder at codespeak.net Mon Sep 3 13:43:01 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 3 Sep 2007 13:43:01 +0200 (CEST)
Subject: [Lxml-checkins] r46269 - lxml/trunk/src/lxml/tests
Message-ID: <20070903114301.85EC280E9@code0.codespeak.net>
Author: scoder
Date: Mon Sep 3 13:43:00 2007
New Revision: 46269
Modified:
lxml/trunk/src/lxml/tests/common_imports.py
Log:
additional test import of cElementTree
Modified: lxml/trunk/src/lxml/tests/common_imports.py
==============================================================================
--- lxml/trunk/src/lxml/tests/common_imports.py (original)
+++ lxml/trunk/src/lxml/tests/common_imports.py Mon Sep 3 13:43:00 2007
@@ -14,6 +14,14 @@
ElementTree = None
try:
+ from xml.etree import cElementTree # Python 2.5
+except ImportError:
+ try:
+ from celementtree import cElementTree # standard ET
+ except ImportError:
+ cElementTree = None
+
+try:
import doctest
# check if the system version has everything we need
doctest.DocFileSuite
From scoder at codespeak.net Mon Sep 3 13:54:40 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 3 Sep 2007 13:54:40 +0200 (CEST)
Subject: [Lxml-checkins] r46271 - lxml/trunk/src/lxml/tests
Message-ID: <20070903115440.9B14B814E@code0.codespeak.net>
Author: scoder
Date: Mon Sep 3 13:54:37 2007
New Revision: 46271
Modified:
lxml/trunk/src/lxml/tests/common_imports.py
Log:
fix import
Modified: lxml/trunk/src/lxml/tests/common_imports.py
==============================================================================
--- lxml/trunk/src/lxml/tests/common_imports.py (original)
+++ lxml/trunk/src/lxml/tests/common_imports.py Mon Sep 3 13:54:37 2007
@@ -6,7 +6,7 @@
from lxml import etree
try:
- from xml.etree import ElementTree # Python 2.5
+ from xml.etree import ElementTree # Python 2.5+
except ImportError:
try:
from elementtree import ElementTree # standard ET
@@ -14,10 +14,10 @@
ElementTree = None
try:
- from xml.etree import cElementTree # Python 2.5
+ from xml.etree import cElementTree # Python 2.5+
except ImportError:
try:
- from celementtree import cElementTree # standard ET
+ import cElementTree # standard ET
except ImportError:
cElementTree = None
From scoder at codespeak.net Mon Sep 3 13:55:11 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 3 Sep 2007 13:55:11 +0200 (CEST)
Subject: [Lxml-checkins] r46272 - lxml/trunk/src/lxml/tests
Message-ID: <20070903115511.4FCBA814E@code0.codespeak.net>
Author: scoder
Date: Mon Sep 3 13:55:10 2007
New Revision: 46272
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
cleanup of test_elementtree.py to integrate cElementTree
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Mon Sep 3 13:55:10 2007
@@ -9,12 +9,17 @@
"""
import unittest, doctest
-import os, re, shutil, tempfile, copy
+import os, re, shutil, tempfile, copy, operator
-from common_imports import StringIO, etree, ElementTree
-from common_imports import HelperTestCase, fileInTestDir, canonicalize
+from common_imports import StringIO, etree, ElementTree, cElementTree
+from common_imports import fileInTestDir, canonicalize
-class ETreeTestCaseBase(HelperTestCase):
+if cElementTree is not None:
+ if tuple([int(n) for n in
+ getattr(cElementTree, "VERSION", "0.0").split(".")]) <= (1,0,6):
+ cElementTree = None
+
+class ETreeTestCaseBase(unittest.TestCase):
etree = None
def setUp(self):
@@ -75,7 +80,7 @@
self.assertEquals('one', root[0].tag)
self.assertEquals('two', root[1].tag)
self.assertEquals('three', root[2].tag)
- self.assertRaises(IndexError, root.__getitem__, 3)
+ self.assertRaises(IndexError, operator.getitem, root, 3)
def test_subelement(self):
Element = self.etree.Element
@@ -116,7 +121,7 @@
root = doc.getroot()
self.assertEquals(1, len(root))
self.assertEquals('one', root[0].tag)
- self.assertRaises(IndexError, root.__getitem__, 1)
+ self.assertRaises(IndexError, operator.getitem, root, 1)
def test_element_indexing_with_text2(self):
ElementTree = self.etree.ElementTree
@@ -147,7 +152,7 @@
self.assertEquals(d, a[-1])
self.assertEquals(c, a[-2])
self.assertEquals(b, a[-3])
- self.assertRaises(IndexError, a.__getitem__, -4)
+ self.assertRaises(IndexError, operator.getitem, a, -4)
a[-1] = e = Element('e')
self.assertEquals(e, a[-1])
del a[-1]
@@ -266,7 +271,7 @@
root = doc.getroot()
self.assertEquals('One', root.attrib['one'])
self.assertEquals('Two', root.attrib['two'])
- self.assertRaises(KeyError, root.attrib.__getitem__, 'three')
+ self.assertRaises(KeyError, operator.getitem, root.attrib, 'three')
def test_attributes2(self):
ElementTree = self.etree.ElementTree
@@ -917,6 +922,18 @@
self.assertXML("",
a)
+ def test_processinginstruction(self):
+ # lxml.etree separates target and text
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+ ProcessingInstruction = self.etree.PI
+
+ a = Element('a')
+ a.append(ProcessingInstruction('foo', 'some more text'))
+ self.assertEquals(a[0].tag, ProcessingInstruction)
+ self.assertXML("",
+ a)
+
def test_pi_nonsense(self):
ProcessingInstruction = self.etree.ProcessingInstruction
pi = ProcessingInstruction('foo')
@@ -980,7 +997,7 @@
a = Element('a')
b = SubElement(a, 'b')
- self.assertRaises(IndexError, a.__setitem__, 1, Element('c'))
+ self.assertRaises(IndexError, operator.setitem, a, 1, Element('c'))
def test_setitem_tail(self):
Element = self.etree.Element
@@ -1583,7 +1600,7 @@
a.attrib['bar'] = 'Bar'
self.assertEquals('Foo', a.attrib['foo'])
del a.attrib['foo']
- self.assertRaises(KeyError, a.attrib.__getitem__, 'foo')
+ self.assertRaises(KeyError, operator.getitem, a.attrib, 'foo')
def test_getslice(self):
Element = self.etree.Element
@@ -2514,6 +2531,15 @@
self.assertEquals(qname1, qname1)
self.assertEquals(qname1, qname2)
+ def test_parser_version(self):
+ etree = self.etree
+ parser = etree.XMLParser()
+ if hasattr(parser, "version"):
+ # ElementTree 1.3+, cET
+ self.assert_(re.match("[^ ]+ [0-9.]+", parser.version))
+
+ # helper methods
+
def _writeElement(self, element, encoding='us-ascii'):
"""Write out element for comparison.
"""
@@ -2592,19 +2618,33 @@
mapping["key"] = "value"
self.assertEquals("value", mapping["key"])
+ # assertFalse doesn't exist in Python 2.3
+ try:
+ unittest.TestCase.assertFalse
+ except AttributeError:
+ assertFalse = unittest.TestCase.failIf
-class ETreeTestCase(ETreeTestCaseBase):
- etree = etree
+
+if etree:
+ class ETreeTestCase(ETreeTestCaseBase):
+ etree = etree
if ElementTree:
class ElementTreeTestCase(ETreeTestCaseBase):
etree = ElementTree
+if cElementTree:
+ class CElementTreeTestCase(ETreeTestCaseBase):
+ etree = cElementTree
+
def test_suite():
suite = unittest.TestSuite()
- suite.addTests([unittest.makeSuite(ETreeTestCase)])
+ if etree:
+ suite.addTests([unittest.makeSuite(ETreeTestCase)])
if ElementTree:
suite.addTests([unittest.makeSuite(ElementTreeTestCase)])
+ if cElementTree:
+ suite.addTests([unittest.makeSuite(CElementTreeTestCase)])
return suite
if __name__ == '__main__':
From scoder at codespeak.net Mon Sep 3 16:02:37 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 3 Sep 2007 16:02:37 +0200 (CEST)
Subject: [Lxml-checkins] r46276 - lxml/trunk/src/lxml/tests
Message-ID: <20070903140237.7C45C8144@code0.codespeak.net>
Author: scoder
Date: Mon Sep 3 16:02:36 2007
New Revision: 46276
Modified:
lxml/trunk/src/lxml/tests/common_imports.py
lxml/trunk/src/lxml/tests/test_objectify.py
lxml/trunk/src/lxml/tests/test_pyclasslookup.py
Log:
more test case cleanup
Modified: lxml/trunk/src/lxml/tests/common_imports.py
==============================================================================
--- lxml/trunk/src/lxml/tests/common_imports.py (original)
+++ lxml/trunk/src/lxml/tests/common_imports.py Mon Sep 3 16:02:36 2007
@@ -31,12 +31,6 @@
# we need our own version to make it work (Python 2.3?)
import local_doctest as doctest
-try:
- from operator import itemgetter
-except ImportError:
- def itemgetter(item):
- return lambda obj: obj[item]
-
class HelperTestCase(unittest.TestCase):
def parse(self, text):
f = StringIO(text)
Modified: lxml/trunk/src/lxml/tests/test_objectify.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_objectify.py (original)
+++ lxml/trunk/src/lxml/tests/test_objectify.py Mon Sep 3 16:02:36 2007
@@ -9,7 +9,6 @@
from common_imports import etree, StringIO, HelperTestCase, fileInTestDir
from common_imports import SillyFileLike, canonicalize, doctest
-from common_imports import itemgetter
from lxml import objectify
@@ -373,7 +372,7 @@
self.assertEquals("0", root.c1.c2[0].text)
self.assertEquals("1", root.c1.c2[1].text)
self.assertEquals("2", root.c1.c2[2].text)
- self.assertRaises(IndexError, itemgetter(3), root.c1.c2)
+ self.assertRaises(IndexError, operator.getitem, root.c1.c2, 3)
def test_child_index_neg(self):
root = self.XML(xml_str)
@@ -381,7 +380,7 @@
self.assertEquals("0", root.c1.c2[-3].text)
self.assertEquals("1", root.c1.c2[-2].text)
self.assertEquals("2", root.c1.c2[-1].text)
- self.assertRaises(IndexError, itemgetter(-4), root.c1.c2)
+ self.assertRaises(IndexError, operator.getitem, root.c1.c2, -4)
def test_child_len(self):
root = self.XML(xml_str)
Modified: lxml/trunk/src/lxml/tests/test_pyclasslookup.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_pyclasslookup.py (original)
+++ lxml/trunk/src/lxml/tests/test_pyclasslookup.py Mon Sep 3 16:02:36 2007
@@ -9,7 +9,6 @@
from common_imports import etree, StringIO, HelperTestCase, fileInTestDir
from common_imports import SillyFileLike, canonicalize, doctest
-from common_imports import itemgetter
from lxml.pyclasslookup import PythonElementClassLookup
From scoder at codespeak.net Mon Sep 3 19:32:24 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 3 Sep 2007 19:32:24 +0200 (CEST)
Subject: [Lxml-checkins] r46292 - lxml/trunk/src/lxml/tests
Message-ID: <20070903173224.5B117816C@code0.codespeak.net>
Author: scoder
Date: Mon Sep 3 19:32:23 2007
New Revision: 46292
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
API fix in ET tests
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Mon Sep 3 19:32:23 2007
@@ -687,7 +687,7 @@
SubElement = self.etree.SubElement
el = Element('tag')
- SubElement(el, 'foo', attrib={'foo':'Foo'}, baz="Baz")
+ SubElement(el, 'foo', {'foo':'Foo'}, baz="Baz")
self.assertEquals("Baz", el[0].attrib['baz'])
self.assertEquals('Foo', el[0].attrib['foo'])
From scoder at codespeak.net Tue Sep 4 08:46:30 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 4 Sep 2007 08:46:30 +0200 (CEST)
Subject: [Lxml-checkins] r46296 - lxml/trunk
Message-ID: <20070904064630.30E8B8173@code0.codespeak.net>
Author: scoder
Date: Tue Sep 4 08:46:27 2007
New Revision: 46296
Modified:
lxml/trunk/version.txt
Log:
set version to 2.0alpa2
Modified: lxml/trunk/version.txt
==============================================================================
--- lxml/trunk/version.txt (original)
+++ lxml/trunk/version.txt Tue Sep 4 08:46:27 2007
@@ -1 +1 @@
-2.0alpha1
+2.0alpha2
From scoder at codespeak.net Tue Sep 4 08:47:00 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 4 Sep 2007 08:47:00 +0200 (CEST)
Subject: [Lxml-checkins] r46297 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20070904064700.AB2EE8173@code0.codespeak.net>
Author: scoder
Date: Tue Sep 4 08:47:00 2007
New Revision: 46297
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
ET-like feed parser interface
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Tue Sep 4 08:47:00 2007
@@ -8,6 +8,9 @@
Features added
--------------
+* ElementTree-like feed parser interface on XMLParser and HTMLParser
+ (``feed()`` and ``close()`` methods)
+
Bugs fixed
----------
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Tue Sep 4 08:47:00 2007
@@ -4,7 +4,14 @@
cimport htmlparser
from xmlparser cimport xmlParserCtxt, xmlDict
-class XMLSyntaxError(LxmlSyntaxError):
+class ParseError(LxmlSyntaxError):
+ """Syntax error while parsing an XML document.
+
+ For compatibility with ElementTree 1.3 and later.
+ """
+ pass
+
+class XMLSyntaxError(ParseError):
"""Syntax error while parsing an XML document.
"""
pass
@@ -381,6 +388,7 @@
cdef xmlParserCtxt* _parser_ctxt
cdef ElementClassLookup _class_lookup
cdef python.PyThread_type_lock _parser_lock
+ cdef int _feed_parser_running
def __init__(self, int parse_options, remove_comments, remove_pis,
context_class=_ResolverContext):
@@ -489,6 +497,113 @@
def __get__(self):
return "libxml2 %d.%d.%d" % LIBXML_VERSION
+ # feed parser interface
+
+ def feed(self, data):
+ """Feeds data to the parser. The argument should be an 8-bit string
+ buffer containing encoded data, although Unicode is supported as long
+ as both string types are not mixed.
+
+ This is the main entry point to the consumer interface of a parser.
+ The parser will parse as much of the XML stream as it can on each
+ call. To finish parsing, call the ``close()`` method.
+
+ It is not possible to use the parser in any other way after calling
+ the ``feed()`` method. The parser can only be reset by calling
+ ``close()``.
+ """
+ cdef xmlParserCtxt* pctxt
+ cdef Py_ssize_t py_buffer_len
+ cdef char* c_data
+ cdef char* c_encoding
+ cdef int buffer_len
+ cdef int error
+ cdef int recover
+ if python.PyString_Check(data):
+ c_encoding = NULL
+ c_data = _cstr(data)
+ py_buffer_len = python.PyString_GET_SIZE(data)
+ elif python.PyUnicode_Check(data):
+ if _UNICODE_ENCODING is NULL:
+ raise ParserError, \
+ "Unicode parsing is not supported on this platform"
+ c_encoding = _UNICODE_ENCODING
+ c_data = python.PyUnicode_AS_DATA(data)
+ py_buffer_len = python.PyUnicode_GET_DATA_SIZE(data)
+ else:
+ raise TypeError, "Parsing requires string data"
+
+ if py_buffer_len > python.INT_MAX:
+ buffer_len = python.INT_MAX
+ else:
+ buffer_len = py_buffer_len
+
+ pctxt = self._parser_ctxt
+ error = 0
+ if not self._feed_parser_running:
+ self._lockParser()
+ self._feed_parser_running = 1
+ self._error_log.connect()
+ __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
+ xmlparser.xmlCtxtUseOptions(pctxt, self._parse_options)
+ error = xmlparser.xmlCtxtResetPush(
+ pctxt, c_data, buffer_len, NULL, c_encoding)
+ py_buffer_len = py_buffer_len - buffer_len
+
+ while error == 0 and py_buffer_len > 0:
+ c_data = c_data + buffer_len
+ if py_buffer_len > python.INT_MAX:
+ buffer_len = python.INT_MAX
+ else:
+ buffer_len = py_buffer_len
+ py_buffer_len = py_buffer_len - buffer_len
+ error = xmlparser.xmlParseChunk(pctxt, c_data, buffer_len, 0)
+
+ if error:
+ self._feed_parser_running = 0
+ try:
+ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
+ _handleParseResult(pctxt, pctxt.myDoc, None,
+ self._error_log, recover)
+ finally:
+ self._cleanup()
+ self._context.clear()
+ self._error_log.disconnect()
+ self._unlockParser()
+
+ def close(self):
+ """Finishes feeding of data to this parser. This tells the parser to
+ process any remaining data in the feed buffer, and then returns the
+ root Element of the tree that was parsed.
+
+ This method must be called after passing the last chunk of data into
+ the ``feed()`` method. It should only be called when using the feed
+ parser interface is used, all other usage is undefined.
+ """
+ cdef xmlParserCtxt* pctxt
+ cdef xmlDoc* c_doc
+ cdef _Document doc
+ cdef int error
+ if not self._feed_parser_running:
+ raise XMLSyntaxError, "no element found"
+ pctxt = self._parser_ctxt
+ self._feed_parser_running = 0
+ error = xmlparser.xmlParseChunk(pctxt, NULL, 0, 1)
+ try:
+ recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
+ c_doc = _handleParseResult(pctxt, pctxt.myDoc, None,
+ self._error_log, recover)
+ finally:
+ self._cleanup()
+ self._context.clear()
+ self._error_log.disconnect()
+ self._unlockParser()
+
+ doc = _documentFactory(c_doc, self)
+ return doc.getroot()
+
+ # internal parser methods
+
cdef xmlDoc* _parseUnicodeDoc(self, utext, char* c_filename) except NULL:
"""Parse unicode document, share dictionary if possible.
"""
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Tue Sep 4 08:47:00 2007
@@ -2538,6 +2538,34 @@
# ElementTree 1.3+, cET
self.assert_(re.match("[^ ]+ [0-9.]+", parser.version))
+ def test_feed_parser(self):
+ parser = self.etree.XMLParser()
+
+ parser.feed('<')
+ parser.feed('a test="works"/')
+ parser.feed('>')
+
+ root = parser.close()
+
+ self.assertEquals(root.tag, "root")
+ self.assertEquals(root[0].tag, "a")
+ self.assertEquals(root[0].get("test"), "works")
+
+ def test_feed_parser_error_close_empty(self):
+ parser = self.etree.XMLParser()
+ self.assertRaises(Exception, parser.close)
+
+ def test_feed_parser_error_close_incomplete(self):
+ parser = self.etree.XMLParser()
+
+ parser.feed('
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20070904/d0e19050/attachment.htm
From scoder at codespeak.net Tue Sep 4 09:22:22 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 4 Sep 2007 09:22:22 +0200 (CEST)
Subject: [Lxml-checkins] r46298 - in lxml/trunk: doc src/lxml
Message-ID: <20070904072222.0E095817E@code0.codespeak.net>
Author: scoder
Date: Tue Sep 4 09:22:21 2007
New Revision: 46298
Modified:
lxml/trunk/doc/parsing.txt
lxml/trunk/src/lxml/parser.pxi
Log:
doc update on the feed parser
Modified: lxml/trunk/doc/parsing.txt
==============================================================================
--- lxml/trunk/doc/parsing.txt (original)
+++ lxml/trunk/doc/parsing.txt Tue Sep 4 09:22:21 2007
@@ -9,8 +9,17 @@
.. contents::
..
1 Parsers
- 2 iterparse and iterwalk
- 3 Python unicode strings
+ 1.1 Parser options
+ 1.2 Parsing HTML
+ 1.3 Doctype information
+ 2 The feed parser interface
+ 3 iterparse and iterwalk
+ 3.1 Selective tag events
+ 3.2 Modifying the tree
+ 3.3 iterwalk
+ 4 Python unicode strings
+ 4.1 Serialising to Unicode strings
+
The usual setup procedure::
@@ -167,6 +176,45 @@
ascii
+The feed parser interface
+=========================
+
+Since lxml 2.0, the parsers have a feed parser interface that is compatible to
+the `ElementTree parsers`_. You can use it to feed data into the parser in a
+controlled step-by-step way. Note that you can only use one interface at a
+time: the ``parse()`` or ``XML()`` functions, or the feed parser interface.
+
+.. _`ElementTree parsers`: http://effbot.org/elementtree/elementtree-xmlparser.htm
+
+To start parsing with a feed parser, just call its ``feed()`` method::
+
+ >>> parser = etree.XMLParser()
+
+ >>> for data in (''):
+ ... parser.feed(data)
+
+When you are done parsing, you **must** call the ``close()`` method to
+retrieve the root Element of the parse result document, and to unlock the
+parser::
+
+ >>> root = parser.close()
+
+ >>> print root.tag
+ root
+ >>> print root[0].tag
+ a
+
+If you do not call ``close()``, the parser will stay locked and subsequent
+usages will block till the end of times. So make sure you also close it in
+the exception case.
+
+Another way of achieving the same step-by-step parsing is by writing your own
+file-like object that returns a chunk of data on each ``read()`` call. Where
+the feed parser interface allows you to actively pass data chunks into the
+parser, a file-like object passively responds to ``read()`` requests of the
+parser itself. Depending on the data source, either way may be more natural.
+
+
iterparse and iterwalk
======================
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Tue Sep 4 09:22:21 2007
@@ -578,7 +578,7 @@
This method must be called after passing the last chunk of data into
the ``feed()`` method. It should only be called when using the feed
- parser interface is used, all other usage is undefined.
+ parser interface, all other usage is undefined.
"""
cdef xmlParserCtxt* pctxt
cdef xmlDoc* c_doc
From scoder at codespeak.net Tue Sep 4 09:57:20 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 4 Sep 2007 09:57:20 +0200 (CEST)
Subject: [Lxml-checkins] r46299 - in lxml/trunk: . src/lxml
Message-ID: <20070904075720.E19E2812C@code0.codespeak.net>
Author: scoder
Date: Tue Sep 4 09:57:19 2007
New Revision: 46299
Modified:
lxml/trunk/Makefile
lxml/trunk/src/lxml/classlookup.pxi
lxml/trunk/src/lxml/objectify.pyx
lxml/trunk/src/lxml/sax.py
Log:
docstring cleanup
Modified: lxml/trunk/Makefile
==============================================================================
--- lxml/trunk/Makefile (original)
+++ lxml/trunk/Makefile Tue Sep 4 09:57:19 2007
@@ -39,7 +39,7 @@
rm -fr doc/html/api
@[ -x "`which epydoc`" ] \
&& (cd src && echo "Generating API docs ..." && \
- PYTHONPATH=. epydoc -o ../doc/html/api --name lxml --url http://codespeak.net/lxml/ lxml/) \
+ PYTHONPATH=. epydoc -v -o ../doc/html/api --name lxml --url http://codespeak.net/lxml/ lxml/) \
|| (echo "not generating epydoc API documentation")
# XXX What should the default be?
Modified: lxml/trunk/src/lxml/classlookup.pxi
==============================================================================
--- lxml/trunk/src/lxml/classlookup.pxi (original)
+++ lxml/trunk/src/lxml/classlookup.pxi Tue Sep 4 09:57:19 2007
@@ -231,7 +231,7 @@
cdef class CustomElementClassLookup(FallbackElementClassLookup):
"""Element class lookup based on a subclass method.
- You can inherit from this class and override the method
+ You can inherit from this class and override the method::
lookup(self, type, doc, namespace, name)
Modified: lxml/trunk/src/lxml/objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/objectify.pyx (original)
+++ lxml/trunk/src/lxml/objectify.pyx Tue Sep 4 09:57:19 2007
@@ -266,12 +266,14 @@
_appendValue(self, _buildChildTag(self, tag), value)
def __getitem__(self, key):
- """Return a sibling, counting from the first child of the parent.
+ """Return a sibling, counting from the first child of the parent. The
+ method behaves like both a dict and a sequence.
* If argument is an integer, returns the sibling at that position.
- * If argument is a string, does the same as getattr(). This is used
- to provide namespaces for element lookup.
+ * If argument is a string, does the same as getattr(). This can be
+ used to provide namespaces for element lookup, or to look up
+ children with special names (``text`` etc.).
"""
cdef tree.xmlNode* c_self_node
cdef tree.xmlNode* c_parent
Modified: lxml/trunk/src/lxml/sax.py
==============================================================================
--- lxml/trunk/src/lxml/sax.py (original)
+++ lxml/trunk/src/lxml/sax.py Tue Sep 4 09:57:19 2007
@@ -1,8 +1,9 @@
from xml.sax.handler import ContentHandler
-from etree import ElementTree, Element, SubElement, LxmlError
-from etree import XML, Comment, ProcessingInstruction
+import etree
+from etree import ElementTree, SubElement
+from etree import Comment, ProcessingInstruction
-class SaxError(LxmlError):
+class SaxError(etree.LxmlError):
"""General SAX error.
"""
pass
@@ -24,7 +25,7 @@
self._ns_mapping = { None : [None] }
self._new_mappings = {}
if makeelement is None:
- makeelement = Element
+ makeelement = etree.Element
self._makeelement = makeelement
def _get_etree(self):
From lxml-checkins at codespeak.net Tue Sep 4 21:30:02 2007
From: lxml-checkins at codespeak.net (Viagra.com Inc ®)
Date: Tue, 4 Sep 2007 21:30:02 +0200 (CEST)
Subject: [Lxml-checkins] Official Site
Message-ID: <11023058544.0412311336185.567948300-5610@cimail939.msn.com>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20070904/31e65509/attachment.htm
From scoder at codespeak.net Tue Sep 4 21:32:51 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 4 Sep 2007 21:32:51 +0200 (CEST)
Subject: [Lxml-checkins] r46311 - lxml/trunk/src/lxml
Message-ID: <20070904193251.C0F248130@code0.codespeak.net>
Author: scoder
Date: Tue Sep 4 21:32:51 2007
New Revision: 46311
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
feed parser fix
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Tue Sep 4 21:32:51 2007
@@ -1,4 +1,4 @@
-# XML parser that provides dictionary sharing
+# Parsers for XML and HTML
cimport xmlparser
cimport htmlparser
@@ -533,11 +533,6 @@
else:
raise TypeError, "Parsing requires string data"
- if py_buffer_len > python.INT_MAX:
- buffer_len = python.INT_MAX
- else:
- buffer_len = py_buffer_len
-
pctxt = self._parser_ctxt
error = 0
if not self._feed_parser_running:
@@ -546,18 +541,26 @@
self._error_log.connect()
__GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
xmlparser.xmlCtxtUseOptions(pctxt, self._parse_options)
+
+ if py_buffer_len > python.INT_MAX:
+ buffer_len = python.INT_MAX
+ else:
+ buffer_len = py_buffer_len
+
error = xmlparser.xmlCtxtResetPush(
- pctxt, c_data, buffer_len, NULL, c_encoding)
+ pctxt, c_data, buffer_len, NULL, c_encoding)
+
py_buffer_len = py_buffer_len - buffer_len
+ c_data = c_data + buffer_len
while error == 0 and py_buffer_len > 0:
- c_data = c_data + buffer_len
if py_buffer_len > python.INT_MAX:
buffer_len = python.INT_MAX
else:
buffer_len = py_buffer_len
py_buffer_len = py_buffer_len - buffer_len
error = xmlparser.xmlParseChunk(pctxt, c_data, buffer_len, 0)
+ c_data = c_data + buffer_len
if error:
self._feed_parser_running = 0
From scoder at codespeak.net Tue Sep 4 21:33:53 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 4 Sep 2007 21:33:53 +0200 (CEST)
Subject: [Lxml-checkins] r46312 - lxml/trunk/src/lxml/tests
Message-ID: <20070904193353.BE2CC8130@code0.codespeak.net>
Author: scoder
Date: Tue Sep 4 21:33:53 2007
New Revision: 46312
Modified:
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
test case for broken feed parser input
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Tue Sep 4 21:33:53 2007
@@ -2566,6 +2566,19 @@
self.assertRaises(Exception, parser.close)
+ def test_feed_parser_error_broken(self):
+ parser = self.etree.XMLParser()
+
+ parser.feed('
Author: ianb
Date: Thu Sep 6 17:40:00 2007
New Revision: 46372
Modified:
lxml/trunk/src/lxml/html/clean.py
Log:
typo in copy
Modified: lxml/trunk/src/lxml/html/clean.py
==============================================================================
--- lxml/trunk/src/lxml/html/clean.py (original)
+++ lxml/trunk/src/lxml/html/clean.py Thu Sep 6 17:40:00 2007
@@ -27,6 +27,7 @@
# +ADw-SCRIPT+AD4-alert('XSS');+ADw-/SCRIPT+AD4-
# you don't always have to have the charset set, if the page has no charset
# and there's UTF7-like code in it.
+# Look at these tests: http://htmlpurifier.org/live/smoketests/xssAttacks.php
# This is an IE-specific construct you can have in a stylesheet to
@@ -355,7 +356,7 @@
doc = fromstring(html)
else:
return_string = False
- doc = copy.deepcopy(doc)
+ doc = copy.deepcopy(html)
self(doc)
if return_string:
return tostring(doc)
From lxml-checkins at codespeak.net Fri Sep 7 02:54:50 2007
From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net)
Date: Fri, 7 Sep 2007 02:54:50 +0200 (CEST)
Subject: [Lxml-checkins] apnalbdq lxml-checkins@codespeak.net Offer
Message-ID: <20070907145617.23681.qmail@adsl-pool-222.123.32-32.tttmaxnet.com>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20070907/39b7c150/attachment.htm
From scoder at codespeak.net Mon Sep 10 14:20:58 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 10 Sep 2007 14:20:58 +0200 (CEST)
Subject: [Lxml-checkins] r46438 - lxml/trunk
Message-ID: <20070910122058.A18CF810E@code0.codespeak.net>
Author: scoder
Date: Mon Sep 10 14:20:57 2007
New Revision: 46438
Modified:
lxml/trunk/setup.py
lxml/trunk/versioninfo.py
Log:
drop branch link in pre-releases
Modified: lxml/trunk/setup.py
==============================================================================
--- lxml/trunk/setup.py (original)
+++ lxml/trunk/setup.py Mon Sep 10 14:20:57 2007
@@ -41,6 +41,19 @@
print "Building lxml version", svn_version
+branch_link = """
+After an official release of a new stable series, current bug fixes become
+available at http://codespeak.net/svn/lxml/branch/lxml-%(branch_version)s .
+Running ``easy_install lxml==%(branch_version)sbugfix`` will install this
+version from
+http://codespeak.net/svn/lxml/branch/lxml-%(branch_version)s#egg=lxml-%(branch_version)sbugfix
+
+"""
+
+if versioninfo.is_pre_release():
+ branch_link = ""
+
+
extra_options.update(setupinfo.extra_setup_args())
setup(
@@ -55,7 +68,7 @@
description="Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API.",
- long_description=(("""\
+ long_description=((("""\
lxml is a Pythonic, mature binding for the libxml2 and libxslt libraries. It
provides safe and convenient access to these libraries using the ElementTree
API.
@@ -71,13 +84,7 @@
Running ``easy_install lxml==dev`` will install it from
http://codespeak.net/svn/lxml/trunk#egg=lxml-dev
-After an official release of a new stable series, current bug fixes might
-become available at
-http://codespeak.net/svn/lxml/branch/lxml-%(branch_version)s . Running
-``easy_install lxml==%(branch_version)sbugfix`` will install this version from
-http://codespeak.net/svn/lxml/branch/lxml-%(branch_version)s#egg=lxml-%(branch_version)sbugfix
-
-""" % { "branch_version" : versioninfo.branch_version() }) +
+""" + branch_link) % { "branch_version" : versioninfo.branch_version() }) +
versioninfo.changes()),
classifiers = [
versioninfo.dev_status(),
Modified: lxml/trunk/versioninfo.py
==============================================================================
--- lxml/trunk/versioninfo.py (original)
+++ lxml/trunk/versioninfo.py Mon Sep 10 14:20:57 2007
@@ -11,6 +11,10 @@
def branch_version():
return version()[:3]
+def is_pre_release():
+ version_string = version()
+ return "dev" in version_string or "alpha" in version_string or "beta" in version_string
+
def svn_version():
_version = version()
src_dir = get_src_dir()
From scoder at codespeak.net Mon Sep 10 14:21:10 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 10 Sep 2007 14:21:10 +0200 (CEST)
Subject: [Lxml-checkins] r46439 - lxml/trunk/doc
Message-ID: <20070910122110.D99C48111@code0.codespeak.net>
Author: scoder
Date: Mon Sep 10 14:21:10 2007
New Revision: 46439
Modified:
lxml/trunk/doc/parsing.txt
Log:
small clarification in docs
Modified: lxml/trunk/doc/parsing.txt
==============================================================================
--- lxml/trunk/doc/parsing.txt (original)
+++ lxml/trunk/doc/parsing.txt Mon Sep 10 14:21:10 2007
@@ -182,7 +182,8 @@
Since lxml 2.0, the parsers have a feed parser interface that is compatible to
the `ElementTree parsers`_. You can use it to feed data into the parser in a
controlled step-by-step way. Note that you can only use one interface at a
-time: the ``parse()`` or ``XML()`` functions, or the feed parser interface.
+time with each parser: the ``parse()`` or ``XML()`` functions, or the feed
+parser interface.
.. _`ElementTree parsers`: http://effbot.org/elementtree/elementtree-xmlparser.htm
From scoder at codespeak.net Mon Sep 10 16:23:24 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 10 Sep 2007 16:23:24 +0200 (CEST)
Subject: [Lxml-checkins] r46441 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20070910142324.3E714810F@code0.codespeak.net>
Author: scoder
Date: Mon Sep 10 16:23:22 2007
New Revision: 46441
Modified:
lxml/trunk/selftest.py
lxml/trunk/src/lxml/cstd.pxd
lxml/trunk/src/lxml/docloader.pxi
lxml/trunk/src/lxml/dtd.pxi
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/htmlparser.pxd
lxml/trunk/src/lxml/iterparse.pxi
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/tests/test_elementtree.py
lxml/trunk/src/lxml/xmlparser.pxd
Log:
major restructuring of the parser code to better interate feed parser and (the new) target parser
Modified: lxml/trunk/selftest.py
==============================================================================
--- lxml/trunk/selftest.py (original)
+++ lxml/trunk/selftest.py Mon Sep 10 16:23:22 2007
@@ -266,7 +266,8 @@
## """
## Test HTML parsing.
-## >>> p = HTMLTreeBuilder.TreeBuilder()
+## >>> # p = HTMLTreeBuilder.TreeBuilder()
+## >>> p = ElementTree.HTMLParser()
## >>> p.feed("spamegg
")
## >>> serialize(p.close())
## 'spamegg
'
Modified: lxml/trunk/src/lxml/cstd.pxd
==============================================================================
--- lxml/trunk/src/lxml/cstd.pxd (original)
+++ lxml/trunk/src/lxml/cstd.pxd Mon Sep 10 16:23:22 2007
@@ -13,6 +13,7 @@
cdef int strcmp(char* s1, char* s2)
cdef int strncmp(char* s1, char* s2, size_t len)
cdef void* memcpy(void* dest, void* src, size_t len)
+ cdef void* memset(void* s, int c, size_t len)
cdef extern from "stdarg.h":
ctypedef void *va_list
Modified: lxml/trunk/src/lxml/docloader.pxi
==============================================================================
--- lxml/trunk/src/lxml/docloader.pxi (original)
+++ lxml/trunk/src/lxml/docloader.pxi Mon Sep 10 16:23:22 2007
@@ -94,9 +94,12 @@
cdef class _ResolverContext(_ExceptionContext):
cdef _ResolverRegistry _resolvers
cdef _TempStore _storage
- def __init__(self, _ResolverRegistry resolvers not None):
+ def __init__(self, _ResolverRegistry resolvers):
_ExceptionContext.__init__(self)
- self._resolvers = resolvers
+ if resolvers is None:
+ self._resolvers = _ResolverRegistry()
+ else:
+ self._resolvers = resolvers
self._storage = _TempStore()
cdef void clear(self):
Modified: lxml/trunk/src/lxml/dtd.pxi
==============================================================================
--- lxml/trunk/src/lxml/dtd.pxi (original)
+++ lxml/trunk/src/lxml/dtd.pxi Mon Sep 10 16:23:22 2007
@@ -88,10 +88,10 @@
cdef tree.xmlDtd* _parseDtdFromFilelike(file) except NULL:
cdef _ExceptionContext exc_context
- cdef _FileParserContext dtd_parser
+ cdef _FileReaderContext dtd_parser
cdef tree.xmlDtd* c_dtd
exc_context = _ExceptionContext()
- dtd_parser = _FileParserContext(file, exc_context)
+ dtd_parser = _FileReaderContext(file, exc_context)
c_dtd = dtd_parser._readDtd()
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Mon Sep 10 16:23:22 2007
@@ -2131,19 +2131,20 @@
################################################################################
# Include submodules
-include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.)
-include "apihelpers.pxi" # Private helper functions
-include "xmlerror.pxi" # Error and log handling
-include "classlookup.pxi"# Element class lookup mechanisms
-include "nsclasses.pxi" # Namespace implementation and registry
-include "docloader.pxi" # Support for custom document loaders
-include "parser.pxi" # XML Parser
-include "serializer.pxi" # XML output functions
-include "iterparse.pxi" # incremental XML parsing
-include "xmlid.pxi" # XMLID and IDDict
-include "extensions.pxi" # XPath/XSLT extension functions
-include "xpath.pxi" # XPath evaluation
-include "xslt.pxi" # XSL transformations
+include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.)
+include "apihelpers.pxi" # Private helper functions
+include "xmlerror.pxi" # Error and log handling
+include "classlookup.pxi" # Element class lookup mechanisms
+include "nsclasses.pxi" # Namespace implementation and registry
+include "docloader.pxi" # Support for custom document loaders
+include "parser.pxi" # XML Parser
+include "parsertarget.pxi" # ET Parser target
+include "serializer.pxi" # XML output functions
+include "iterparse.pxi" # incremental XML parsing
+include "xmlid.pxi" # XMLID and IDDict
+include "extensions.pxi" # XPath/XSLT extension functions
+include "xpath.pxi" # XPath evaluation
+include "xslt.pxi" # XSL transformations
################################################################################
Modified: lxml/trunk/src/lxml/htmlparser.pxd
==============================================================================
--- lxml/trunk/src/lxml/htmlparser.pxd (original)
+++ lxml/trunk/src/lxml/htmlparser.pxd Mon Sep 10 16:23:22 2007
@@ -17,7 +17,11 @@
cdef xmlParserCtxt* htmlCreateMemoryParserCtxt(char* buffer, int size)
cdef xmlParserCtxt* htmlCreateFileParserCtxt(char* filename, char* encoding)
cdef void htmlFreeParserCtxt(xmlParserCtxt* ctxt)
+ cdef void htmlCtxtReset(xmlParserCtxt* ctxt)
+ cdef int htmlCtxtUseOptions(xmlParserCtxt* ctxt, int options)
cdef int htmlParseDocument(xmlParserCtxt* ctxt)
+ cdef int htmlParseChunk(xmlParserCtxt* ctxt,
+ char* chunk, int size, int terminate)
cdef xmlDoc* htmlCtxtReadFile(xmlParserCtxt* ctxt,
char* filename, char* encoding,
Modified: lxml/trunk/src/lxml/iterparse.pxi
==============================================================================
--- lxml/trunk/src/lxml/iterparse.pxi (original)
+++ lxml/trunk/src/lxml/iterparse.pxi Mon Sep 10 16:23:22 2007
@@ -48,7 +48,7 @@
c_ns = c_ns.next
return count
-cdef class _IterparseContext(_ResolverContext):
+cdef class _IterparseContext(_ParserContext):
cdef xmlparser.startElementNsSAX2Func _origSaxStart
cdef xmlparser.endElementNsSAX2Func _origSaxEnd
cdef _Element _root
@@ -64,8 +64,8 @@
cdef char* _tag_href
cdef char* _tag_name
- def __init__(self, _ResolverRegistry resolvers):
- _ResolverContext.__init__(self, resolvers)
+ def __init__(self):
+ _ParserContext.__init__(self)
self._ns_stack = []
self._pop_ns = self._ns_stack.pop
self._node_stack = []
@@ -73,22 +73,25 @@
self._events = []
self._event_index = 0
- cdef void _wrapCallbacks(self, xmlparser.xmlSAXHandler* sax):
+ cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt):
"wrap original SAX2 callbacks"
+ cdef xmlparser.xmlSAXHandler* sax
+ _ParserContext._initParserContext(self, c_ctxt)
+ sax = c_ctxt.sax
self._origSaxStart = sax.startElementNs
# only override start event handler if needed
if self._event_filter == 0 or \
self._event_filter & (ITERPARSE_FILTER_START | \
ITERPARSE_FILTER_START_NS | \
ITERPARSE_FILTER_END_NS):
- sax.startElementNs = _saxStart
+ sax.startElementNs = _iterparseSaxStart
self._origSaxEnd = sax.endElementNs
# only override end event handler if needed
if self._event_filter == 0 or \
self._event_filter & (ITERPARSE_FILTER_END | \
ITERPARSE_FILTER_END_NS):
- sax.endElementNs = _saxEnd
+ sax.endElementNs = _iterparseSaxEnd
cdef _setEventFilter(self, events, tag):
self._event_filter = _buildIterparseEventFilter(events)
@@ -184,9 +187,10 @@
cdef xmlparser.endElementNsSAX2Func _getOrigEnd(xmlparser.xmlParserCtxt* c_ctxt):
return (<_IterparseContext>c_ctxt._private)._origSaxEnd
-cdef void _saxStart(void* ctxt, char* localname, char* prefix, char* URI,
- int nb_namespaces, char** namespaces,
- int nb_attributes, int nb_defaulted, char** attributes):
+cdef void _iterparseSaxStart(void* ctxt, char* localname, char* prefix,
+ char* URI, int nb_namespaces, char** namespaces,
+ int nb_attributes, int nb_defaulted,
+ char** attributes):
# no Python in here!
cdef xmlparser.xmlParserCtxt* c_ctxt
cdef xmlparser.startElementNsSAX2Func origStart
@@ -196,7 +200,7 @@
nb_attributes, nb_defaulted, attributes)
_pushSaxStartEvent(c_ctxt, c_ctxt.node)
-cdef void _saxEnd(void* ctxt, char* localname, char* prefix, char* URI):
+cdef void _iterparseSaxEnd(void* ctxt, char* localname, char* prefix, char* URI):
# no Python in here!
cdef xmlparser.xmlParserCtxt* c_ctxt
cdef xmlparser.endElementNsSAX2Func origEnd
@@ -276,15 +280,17 @@
parse_options = parse_options | xmlparser.XML_PARSE_NOBLANKS
_BaseParser.__init__(self, parse_options, remove_comments, remove_pis,
- _IterparseContext)
+ None)
context = <_IterparseContext>self._context
context._setEventFilter(events, tag)
- context._wrapCallbacks(self._parser_ctxt.sax)
xmlparser.xmlCtxtUseOptions(self._parser_ctxt, parse_options)
xmlparser.xmlCtxtResetPush(self._parser_ctxt, NULL, 0, c_filename, NULL)
self._lockParser() # will not be unlocked - no other methods supported
+ cdef _ParserContext _createContext(self, target):
+ return _IterparseContext()
+
def __iter__(self):
return self
@@ -318,7 +324,8 @@
break
if error != 0:
self._source = None
- _raiseParseError(self._parser_ctxt, self._filename, None)
+ _raiseParseError(self._parser_ctxt, self._filename,
+ self._context._error_log)
if python.PyList_GET_SIZE(context._events) == 0:
self.root = context._root
self._source = None
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Mon Sep 10 16:23:22 2007
@@ -2,7 +2,6 @@
cimport xmlparser
cimport htmlparser
-from xmlparser cimport xmlParserCtxt, xmlDict
class ParseError(LxmlSyntaxError):
"""Syntax error while parsing an XML document.
@@ -26,17 +25,17 @@
LXML_HTML_PARSER
LXML_ITERPARSE_PARSER
-cdef class _ParserContext:
+cdef class _ParserDictionaryContext:
# Global parser context to share the string dictionary.
#
- # This class is a singleton!
+ # This class is a delegate singleton!
#
- # It creates _ParserContext objects for each thread to keep thread state,
+ # It creates _ParserDictionaryContext objects for each thread to keep thread state,
# but those must never be used directly. Always stick to using the static
# __GLOBAL_PARSER_CONTEXT as defined below the class.
#
- cdef xmlDict* _c_dict
+ cdef tree.xmlDict* _c_dict
cdef _BaseParser _default_parser
def __dealloc__(self):
if self._c_dict is not NULL:
@@ -49,33 +48,33 @@
cdef python.PyObject* result
thread_dict = python.PyThreadState_GetDict()
if thread_dict is not NULL:
- python.PyDict_SetItem(