[Lxml-checkins] r44178 - in lxml/branch/lxml-1.3: . benchmark doc doc/html src/lxml src/lxml/tests
scoder at codespeak.net
scoder at codespeak.net
Tue Jun 12 18:28:51 CEST 2007
Author: scoder
Date: Tue Jun 12 18:28:50 2007
New Revision: 44178
Modified:
lxml/branch/lxml-1.3/CHANGES.txt
lxml/branch/lxml-1.3/benchmark/bench_etree.py
lxml/branch/lxml-1.3/benchmark/bench_objectify.py
lxml/branch/lxml-1.3/benchmark/bench_xpath.py
lxml/branch/lxml-1.3/benchmark/benchbase.py
lxml/branch/lxml-1.3/doc/api.txt
lxml/branch/lxml-1.3/doc/html/style.css
lxml/branch/lxml-1.3/doc/mkhtml.py
lxml/branch/lxml-1.3/doc/performance.txt
lxml/branch/lxml-1.3/src/lxml/etree.pyx
lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py
Log:
merged in revs 42061:42203 from trunk
Modified: lxml/branch/lxml-1.3/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-1.3/CHANGES.txt (original)
+++ lxml/branch/lxml-1.3/CHANGES.txt Tue Jun 12 18:28:50 2007
@@ -8,6 +8,8 @@
Features added
--------------
+* Element.attrib now has a ``pop()`` method
+
* Support for custom Element class instantiation in lxml.sax
* '.' represents empty ObjectPath (identity)
Modified: lxml/branch/lxml-1.3/benchmark/bench_etree.py
==============================================================================
--- lxml/branch/lxml-1.3/benchmark/bench_etree.py (original)
+++ lxml/branch/lxml-1.3/benchmark/bench_etree.py Tue Jun 12 18:28:50 2007
@@ -3,7 +3,7 @@
from StringIO import StringIO
import benchbase
-from benchbase import with_attributes, with_text, onlylib, serialized
+from benchbase import with_attributes, with_text, onlylib, serialized, children
############################################################
# Benchmarks
@@ -77,8 +77,10 @@
root1.append(el)
def bench_insert_from_document(self, root1, root2):
+ pos = len(root1)/2
for el in root2:
- root1.insert(len(root1)/2, el)
+ root1.insert(pos, el)
+ pos = pos + 1
def bench_rotate_children(self, root):
# == "1 2 3" # runs on any single tree independently
@@ -102,18 +104,21 @@
def bench_clear(self, root):
root.clear()
- def bench_has_children(self, root):
- for child in root:
+ @children
+ def bench_has_children(self, children):
+ for child in children:
if child and child and child and child and child:
pass
- def bench_len(self, root):
- for child in root:
+ @children
+ def bench_len(self, children):
+ for child in children:
map(len, repeat(child, 20))
- def bench_create_subelements(self, root):
+ @children
+ def bench_create_subelements(self, children):
SubElement = self.etree.SubElement
- for child in root:
+ for child in children:
SubElement(child, '{test}test')
def bench_append_elements(self, root):
@@ -122,103 +127,120 @@
el = Element('{test}test')
child.append(el)
- def bench_makeelement(self, root):
+ @children
+ def bench_makeelement(self, children):
empty_attrib = {}
- for child in root:
+ for child in children:
child.makeelement('{test}test', empty_attrib)
- def bench_create_elements(self, root):
+ @children
+ def bench_create_elements(self, children):
Element = self.etree.Element
- for child in root:
+ for child in children:
Element('{test}test')
- def bench_replace_children_element(self, root):
+ @children
+ def bench_replace_children_element(self, children):
Element = self.etree.Element
- for child in root:
+ for child in children:
el = Element('{test}test')
child[:] = [el]
- def bench_replace_children(self, root):
- Element = self.etree.Element
- for child in root:
- child[:] = [ child[0] ]
+ @children
+ def bench_replace_children(self, children):
+ els = [ self.etree.Element("newchild") ]
+ for child in children:
+ child[:] = els
def bench_remove_children(self, root):
for child in root:
root.remove(child)
def bench_remove_children_reversed(self, root):
- for child in reversed(root[:]):
+ for child in reversed(root):
root.remove(child)
- def bench_set_attributes(self, root):
- for child in root:
+ @children
+ def bench_set_attributes(self, children):
+ for child in children:
child.set('a', 'bla')
@with_attributes(True)
- def bench_get_attributes(self, root):
- for child in root:
+ @children
+ def bench_get_attributes(self, children):
+ for child in children:
child.get('bla1')
child.get('{attr}test1')
- def bench_setget_attributes(self, root):
- for child in root:
+ @children
+ def bench_setget_attributes(self, children):
+ for child in children:
child.set('a', 'bla')
- for child in root:
+ for child in children:
child.get('a')
def bench_root_getchildren(self, root):
root.getchildren()
- def bench_getchildren(self, root):
- for child in root:
+ @children
+ def bench_getchildren(self, children):
+ for child in children:
child.getchildren()
- def bench_get_children_slice(self, root):
- for child in root:
+ @children
+ def bench_get_children_slice(self, children):
+ for child in children:
child[:]
- def bench_get_children_slice_2x(self, root):
- for child in root:
- children = child[:]
+ @children
+ def bench_get_children_slice_2x(self, children):
+ for child in children:
+ child[:]
child[:]
- def bench_deepcopy(self, root):
- for child in root:
+ @children
+ def bench_deepcopy(self, children):
+ for child in children:
copy.deepcopy(child)
def bench_deepcopy_all(self, root):
copy.deepcopy(root)
- def bench_tag(self, root):
- for child in root:
+ @children
+ def bench_tag(self, children):
+ for child in children:
child.tag
- def bench_tag_repeat(self, root):
- for child in root:
+ @children
+ def bench_tag_repeat(self, children):
+ for child in children:
for i in repeat(0, 100):
child.tag
@with_text(utext=True, text=True, no_text=True)
- def bench_text(self, root):
- for child in root:
+ @children
+ def bench_text(self, children):
+ for child in children:
child.text
@with_text(utext=True, text=True, no_text=True)
- def bench_text_repeat(self, root):
+ @children
+ def bench_text_repeat(self, children):
repeat = range(500)
- for child in root:
+ for child in children:
for i in repeat:
child.text
- def bench_set_text(self, root):
+ @children
+ def bench_set_text(self, children):
text = TEXT
- for child in root:
+ for child in children:
child.text = text
- def bench_set_utext(self, root):
+ @children
+ def bench_set_utext(self, children):
text = UTEXT
- for child in root:
+ for child in children:
child.text = text
@onlylib('lxe')
Modified: lxml/branch/lxml-1.3/benchmark/bench_objectify.py
==============================================================================
--- lxml/branch/lxml-1.3/benchmark/bench_objectify.py (original)
+++ lxml/branch/lxml-1.3/benchmark/bench_objectify.py Tue Jun 12 18:28:50 2007
@@ -10,6 +10,9 @@
############################################################
class BenchMark(benchbase.BenchMarkBase):
+ repeat1000 = range(1000)
+ repeat3000 = range(3000)
+
def __init__(self, lib):
from lxml import etree, objectify
self.objectify = objectify
@@ -20,37 +23,37 @@
def bench_attribute(self, root):
"1 2 4"
- for i in repeat(None, 3000):
+ for i in self.repeat3000:
root.zzzzz
def bench_attribute_cached(self, root):
"1 2 4"
cache = root.zzzzz
- for i in repeat(None, 3000):
+ for i in self.repeat3000:
root.zzzzz
def bench_attributes_deep(self, root):
"1 2 4"
- for i in repeat(None, 3000):
+ for i in self.repeat3000:
root.zzzzz['{cdefg}z00000']
def bench_attributes_deep_cached(self, root):
"1 2 4"
cache1 = root.zzzzz
cache2 = cache1['{cdefg}z00000']
- for i in repeat(None, 3000):
+ for i in self.repeat3000:
root.zzzzz['{cdefg}z00000']
def bench_objectpath(self, root):
"1 2 4"
path = self.objectify.ObjectPath(".zzzzz")
- for i in repeat(None, 3000):
+ for i in self.repeat3000:
path(root)
def bench_objectpath_deep(self, root):
"1 2 4"
path = self.objectify.ObjectPath(".zzzzz.{cdefg}z00000")
- for i in repeat(None, 3000):
+ for i in self.repeat3000:
path(root)
def bench_objectpath_deep_cached(self, root):
@@ -58,7 +61,7 @@
cache1 = root.zzzzz
cache2 = cache1['{cdefg}z00000']
path = self.objectify.ObjectPath(".zzzzz.{cdefg}z00000")
- for i in repeat(None, 3000):
+ for i in self.repeat3000:
path(root)
@with_text(text=True, utext=True, no_text=True)
@@ -72,7 +75,7 @@
def bench_type_inference(self, root):
"1 2 4"
el = root.aaaaa
- for i in repeat(None, 1000):
+ for i in self.repeat1000:
el.getchildren()
@with_text(text=True)
@@ -80,7 +83,7 @@
"1 2 4"
el = root.aaaaa
self.objectify.annotate(el)
- for i in repeat(None, 1000):
+ for i in self.repeat1000:
el.getchildren()
Modified: lxml/branch/lxml-1.3/benchmark/bench_xpath.py
==============================================================================
--- lxml/branch/lxml-1.3/benchmark/bench_xpath.py (original)
+++ lxml/branch/lxml-1.3/benchmark/bench_xpath.py Tue Jun 12 18:28:50 2007
@@ -3,7 +3,7 @@
from StringIO import StringIO
import benchbase
-from benchbase import with_attributes, with_text, onlylib, serialized
+from benchbase import with_attributes, with_text, onlylib, serialized, children
############################################################
# Benchmarks
@@ -11,14 +11,16 @@
class XPathBenchMark(benchbase.BenchMarkBase):
@onlylib('lxe')
- def bench_xpath_class(self, root):
+ @children
+ def bench_xpath_class(self, children):
xpath = self.etree.XPath("./*[0]")
- for child in root:
+ for child in children:
xpath(child)
@onlylib('lxe')
- def bench_xpath_class_repeat(self, root):
- for child in root:
+ @children
+ def bench_xpath_class_repeat(self, children):
+ for child in children:
xpath = self.etree.XPath("./*[0]")
xpath(child)
@@ -29,12 +31,14 @@
xpath.evaluate("./*[0]")
@onlylib('lxe')
- def bench_xpath_method(self, root):
- for child in root:
+ @children
+ def bench_xpath_method(self, children):
+ for child in children:
child.xpath("./*[0]")
@onlylib('lxe')
- def bench_xpath_old_extensions(self, root):
+ @children
+ def bench_xpath_old_extensions(self, children):
def return_child(_, element):
if element:
return element[0]
@@ -42,11 +46,12 @@
return ()
extensions = {(None, 'child') : return_child}
xpath = self.etree.XPath("child(.)", extensions=extensions)
- for child in root:
+ for child in children:
xpath(child)
@onlylib('lxe')
- def bench_xpath_extensions(self, root):
+ @children
+ def bench_xpath_extensions(self, children):
def return_child(_, element):
if element:
return element[0]
@@ -56,7 +61,7 @@
try:
xpath = self.etree.XPath("test:t(.)", {"test":"test"})
- for child in root:
+ for child in children:
xpath(child)
finally:
del self.etree.FunctionNamespace("test")["t"]
Modified: lxml/branch/lxml-1.3/benchmark/benchbase.py
==============================================================================
--- lxml/branch/lxml-1.3/benchmark/benchbase.py (original)
+++ lxml/branch/lxml-1.3/benchmark/benchbase.py Tue Jun 12 18:28:50 2007
@@ -78,6 +78,11 @@
function.STRING = True
return function
+def children(function):
+ "Decorator for benchmarks that require a list of root children"
+ function.CHILDREN = True
+ return function
+
############################################################
# benchmark baseclass
############################################################
@@ -105,13 +110,18 @@
deepcopy = copy.deepcopy
def set_property(root, fname):
xml = self._serialize_tree(root)
- setattr(self, fname, lambda : etree.XML(xml, etree_parser))
+ if etree_parser is not None:
+ setattr(self, fname, lambda : etree.XML(xml, etree_parser))
+ else:
+ setattr(self, fname, lambda : deepcopy(root))
setattr(self, fname + '_xml', lambda : xml)
+ setattr(self, fname + '_children', lambda : root[:])
else:
def set_property(root, fname):
setattr(self, fname, self.et_make_clone_factory(root))
xml = self._serialize_tree(root)
setattr(self, fname + '_xml', lambda : xml)
+ setattr(self, fname + '_children', lambda : root[:])
attribute_list = list(izip(count(), ({}, _ATTRIBUTES)))
text_list = list(izip(count(), (None, _TEXT, _UTEXT)))
@@ -131,10 +141,12 @@
def _tree_builder_name(self, tree, tn, an):
return '_root%d_T%d_A%d' % (tree, tn, an)
- def tree_builder(self, tree, tn, an, serial):
+ def tree_builder(self, tree, tn, an, serial, children):
name = self._tree_builder_name(tree, tn, an)
if serial:
name += '_xml'
+ elif children:
+ name += '_children'
return getattr(self, name)
def _serialize_tree(self, root):
@@ -270,13 +282,14 @@
arg_count = 1
tree_tuples = self._permutations(all_trees, arg_count)
- serialized = getattr(method, 'STRING', False)
+ serialized = getattr(method, 'STRING', False)
+ children = getattr(method, 'CHILDREN', False)
for tree_tuple in tree_tuples:
for tn in sorted(getattr(method, 'TEXT', (0,))):
for an in sorted(getattr(method, 'ATTRIBUTES', (0,))):
benchmarks.append((name, method_call, tree_tuple,
- tn, an, serialized))
+ tn, an, serialized, children))
return benchmarks
@@ -315,11 +328,12 @@
return (benchmark_suites, benchmarks)
-def build_treeset_name(trees, tn, an, serialized):
+def build_treeset_name(trees, tn, an, serialized, children):
text = {0:'-', 1:'S', 2:'U'}[tn]
attr = {0:'-', 1:'A'}[an]
ser = {True:'X', False:'T'}[serialized]
- return "%s%s%s T%s" % (text, attr, ser, ',T'.join(imap(str, trees))[:6])
+ chd = {True:'C', False:'R'}[children]
+ return "%s%s%s%s T%s" % (text, attr, ser, chd, ',T'.join(imap(str, trees))[:6])
def printSetupTimes(benchmark_suites):
print "Setup times for trees in seconds:"
@@ -327,20 +341,20 @@
print "%-3s: " % b.lib_name,
for an in (0,1):
for tn in (0,1,2):
- print ' %s ' % build_treeset_name((), tn, an, False)[:2],
+ print ' %s ' % build_treeset_name((), tn, an, False, False)[:2],
print
for i, tree_times in enumerate(b.setup_times):
print " T%d:" % (i+1), ' '.join("%6.4f" % t for t in tree_times)
print
-def runBench(suite, method_name, method_call, tree_set, tn, an, serial):
+def runBench(suite, method_name, method_call, tree_set, tn, an, serial, children):
if method_call is None:
raise SkippedTest
current_time = time.time
call_repeat = range(10)
- tree_builders = [ suite.tree_builder(tree, tn, an, serial)
+ tree_builders = [ suite.tree_builder(tree, tn, an, serial, children)
for tree in tree_set ]
times = []
@@ -348,14 +362,17 @@
for i in range(3):
gc.collect()
gc.disable()
- t = 0
+ t = -1
for i in call_repeat:
args = [ build() for build in tree_builders ]
t_one_call = current_time()
method_call(*args)
- t += current_time() - t_one_call
- t = 1000.0 * t / len(call_repeat)
- times.append(t)
+ t_one_call = current_time() - t_one_call
+ if t < 0:
+ t = t_one_call
+ else:
+ t = min(t, t_one_call)
+ times.append(1000.0 * t)
gc.enable()
del args
return times
@@ -364,7 +381,7 @@
for bench_calls in izip(*benchmarks):
for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)):
bench_name = benchmark_setup[0]
- tree_set_name = build_treeset_name(*benchmark_setup[-4:])
+ tree_set_name = build_treeset_name(*benchmark_setup[-5:])
print "%-3s: %-28s" % (bench.lib_name, bench_name[6:34]),
print "(%-10s)" % tree_set_name,
sys.stdout.flush()
Modified: lxml/branch/lxml-1.3/doc/api.txt
==============================================================================
--- lxml/branch/lxml-1.3/doc/api.txt (original)
+++ lxml/branch/lxml-1.3/doc/api.txt Tue Jun 12 18:28:50 2007
@@ -31,8 +31,9 @@
3 Trees and Documents
4 Iteration
5 Error handling on exceptions
- 6 xinclude
- 7 write_c14n on ElementTree
+ 6 Serialisation
+ 7 xinclude
+ 8 write_c14n on ElementTree
lxml.etree
@@ -62,17 +63,16 @@
While lxml.etree itself uses the ElementTree API, it is possible to replace
the Element implementation by `custom element subclasses`_. This has been
-used to implement well-known XML APIs on top of lxml. The ``lxml.elements``
-package contains examples. Currently, there is a data-binding implementation
-called `objectify`_, which is similar to the `Amara bindery`_ tool.
-
-Additionally, the `lxml.elements.classlookup`_ module provides a number of
-different schemes to customize the mapping between libxml2 nodes and the
-Element classes used by lxml.etree.
+used to implement well-known XML APIs on top of lxml. For example, lxml ships
+with a data-binding implementation called `objectify`_, which is similar to
+the `Amara bindery`_ tool.
+
+lxml.etree comes with a number of `different lookup schemes`_ to customize the
+mapping between libxml2 nodes and the Element classes used by lxml.etree.
.. _`custom element subclasses`: namespace_extensions.html
.. _`objectify`: objectify.html
-.. _`lxml.elements.classlookup`: elements.html#lxml.elements.classlookup
+.. _`different lookup schemes`: element_classes.html#setting-up-a-class-lookup-scheme
.. _`Amara bindery`: http://uche.ogbuji.net/tech/4suite/amara/
@@ -228,7 +228,36 @@
etc. which are described in their respective sections below.
-xinclude
+Serialisation
+-------------
+
+lxml.etree has direct support for pretty printing XML output. Functions like
+``ElementTree.write()`` and ``tostring()`` support it through a keyword
+argument::
+
+ >>> root = etree.XML("<root><test/></root>")
+ >>> print etree.tostring(root)
+ <root><test/></root>
+
+ >>> print etree.tostring(root, pretty_print=True)
+ <root>
+ <test/>
+ </root>
+
+By default, lxml (and ElementTree) output the XML declaration only if it is
+required. You can enable or disable it explicitly by passing another keyword
+argument for the serialisation::
+
+ >>> print etree.tostring(root, xml_declaration=True)
+ <?xml version='1.0' encoding='ASCII'?>
+ <root><test/></root>
+
+Also see the general remarks on `Unicode support`_.
+
+.. _`Unicode support`: parsing.html#python-unicode-strings
+
+
+XInclude
--------
Simple XInclude support exists. You can let lxml process xinclude statements
Modified: lxml/branch/lxml-1.3/doc/html/style.css
==============================================================================
--- lxml/branch/lxml-1.3/doc/html/style.css (original)
+++ lxml/branch/lxml-1.3/doc/html/style.css Tue Jun 12 18:28:50 2007
@@ -1,15 +1,15 @@
body {
- /* CSS Hack for IE that does not respect the "margin: auto" rule at the
- * document level */
+ font: 13px Arial, Verdana, Helvetica, sans-serif;
text-align: center;
- padding: 1em;
}
-
@media screen {
+ body {
+ padding: 1em 1em 1em 21em;
+ }
+
div.document {
width: 45em;
- padding-left: 21em;
background-color: white;
}
}
@@ -26,7 +26,6 @@
}
div.document {
- font: 13px Arial, Verdana, Helvetica, sans-serif;
margin: 1em auto 1em auto;
color: #222;
text-align: left;
@@ -50,7 +49,7 @@
/*** side menu ***/
div.sidemenu {
- position: fixed;
+ position: absolute;
top: 0px;
left: 0px;
width: 22em;
@@ -62,6 +61,11 @@
background-color: #FFFAFA;
}
+html > body div.sidemenu {
+ /* ignored by IE -> everyone else knows 'fixed', right? */
+ position: fixed;
+}
+
div.sidemenu span.section.title {
line-height: 1.5em;
font-size: 130%;
Modified: lxml/branch/lxml-1.3/doc/mkhtml.py
==============================================================================
--- lxml/branch/lxml-1.3/doc/mkhtml.py (original)
+++ lxml/branch/lxml-1.3/doc/mkhtml.py Tue Jun 12 18:28:50 2007
@@ -55,7 +55,7 @@
def merge_menu(tree, menu, name):
menu_root = copy.deepcopy(menu)
- tree.getroot()[1][0].append(menu_root) # html->body->div[class=document]
+ tree.getroot()[1][0].insert(0, menu_root) # html->body->div[class=document]
for el in menu_root.getiterator():
tag = el.tag
if tag[0] != '{':
Modified: lxml/branch/lxml-1.3/doc/performance.txt
==============================================================================
--- lxml/branch/lxml-1.3/doc/performance.txt (original)
+++ lxml/branch/lxml-1.3/doc/performance.txt Tue Jun 12 18:28:50 2007
@@ -14,21 +14,26 @@
.. _ElementTree: http://effbot.org/zone/element-index.htm
.. _cElementTree: http://effbot.org/zone/celementtree.htm
-The statements made here are backed by the benchmark script `bench.py`_ that
-comes with the lxml source distribution. The timings cited below compare lxml
-1.0 (with libxml2 2.6.24), ElementTree 1.2.6 and cElementTree 1.0.5 under
-CPython 2.4.2 on a 1.6GHz AMD64 machine.
-
-.. _`bench.py`: http://codespeak.net/svn/lxml/branch/lxml-1.0/bench.py
-
-The ``bench.py`` script runs a number of simple tests on the different
-libraries, using different XML tree configurations: different tree sizes, with
-or without attributes (-/A) and with or without ASCII or unicode text (-/S/U).
-In the result extracts cited below, T1 refers to a 3-level tree with many
-children at the third level, T2 is swapped around to have many children at the
-root element, T3 is a deep tree with few children at each level and T4 is a
-small tree, slightly broader than deep. Most benchmarks run in a loop over
-all children of the tree root.
+The statements made here are backed by the benchmark scripts
+`bench_etree.py`_, `bench_xpath.py`_ and `bench_objectify.py`_ that come with
+the lxml source distribution. The timings cited below compare lxml 1.3 (with
+libxml2 2.6.26) to the ElementTree and cElementTree versions shipped with
+CPython 2.5 (based on ElementTree 1.2.6). They were run single-threaded on a
+1.8GHz Intel Core Duo machine.
+
+.. _`bench_etree.py`: http://codespeak.net/svn/lxml/branch/lxml-1.3/benchmark/bench_etree.py
+.. _`bench_xpath.py`: http://codespeak.net/svn/lxml/branch/lxml-1.3/benchmark/bench_xpath.py
+.. _`bench_objectify.py`: http://codespeak.net/svn/lxml/branch/lxml-1.3/benchmark/bench_objectify.py
+
+The scripts run a number of simple tests on the different libraries, using
+different XML tree configurations: different tree sizes, with or without
+attributes (-/A), with or without ASCII or unicode text (-/S/U), and either
+against a tree or its serialised form (T/X). In the result extracts cited
+below, T1 refers to a 3-level tree with many children at the third level, T2
+is swapped around to have many children at the root element, T3 is a deep tree
+with few children at each level and T4 is a small tree, slightly broader than
+deep. If repetition is involved, this usually means running the benchmark in
+a loop over all children of the tree root.
.. contents::
..
@@ -37,6 +42,7 @@
3 The ElementTree API
4 Tree traversal
5 XPath
+ 6 lxml.objectify
Bad things first
@@ -57,45 +63,57 @@
results are rather impressive. Compared to cElementTree, lxml is about 20 to
40 times faster on serialisation::
- lxe: tostring_utf16 (SA T2) 30.9846 msec/pass
- cET: tostring_utf16 (SA T2) 715.5002 msec/pass
- ET : tostring_utf16 (SA T2) 758.5271 msec/pass
-
- lxe: tostring_utf16 (U- T3) 3.0509 msec/pass
- cET: tostring_utf16 (U- T3) 72.4721 msec/pass
- ET : tostring_utf16 (U- T3) 87.0735 msec/pass
-
- lxe: tostring_utf8 (UA T2) 26.8996 msec/pass
- cET: tostring_utf8 (UA T2) 700.4889 msec/pass
- ET : tostring_utf8 (UA T2) 745.3317 msec/pass
-
- lxe: tostring_utf8 (S- T3) 2.1876 msec/pass
- cET: tostring_utf8 (S- T3) 71.1290 msec/pass
- ET : tostring_utf8 (S- T3) 87.1525 msec/pass
+ lxe: tostring_utf16 (SATR T1) 21.9206 msec/pass
+ cET: tostring_utf16 (SATR T1) 461.9428 msec/pass
+ ET : tostring_utf16 (SATR T1) 486.8946 msec/pass
+
+ lxe: tostring_utf16 (UATR T1) 22.7508 msec/pass
+ cET: tostring_utf16 (UATR T1) 526.3446 msec/pass
+ ET : tostring_utf16 (UATR T1) 496.0767 msec/pass
+
+ lxe: tostring_utf16 (S-TR T2) 23.8452 msec/pass
+ cET: tostring_utf16 (S-TR T2) 537.9200 msec/pass
+ ET : tostring_utf16 (S-TR T2) 504.4273 msec/pass
+
+ lxe: tostring_utf8 (S-TR T2) 18.2550 msec/pass
+ cET: tostring_utf8 (S-TR T2) 528.3908 msec/pass
+ ET : tostring_utf8 (S-TR T2) 549.7071 msec/pass
+
+ lxe: tostring_utf8 (U-TR T3) 2.5497 msec/pass
+ cET: tostring_utf8 (U-TR T3) 49.8495 msec/pass
+ ET : tostring_utf8 (U-TR T3) 62.6927 msec/pass
For parsing, the difference between the libraries is smaller. The (c)ET
libraries use the expat parser, which is known to be extremely fast::
- lxe: parse_stringIO (SA T2) 197.7678 msec/pass
- cET: parse_stringIO (SA T2) 38.9390 msec/pass
- ET : parse_stringIO (SA T2) 364.3468 msec/pass
-
- lxe: parse_stringIO (UA T3) 48.6735 msec/pass
- cET: parse_stringIO (UA T3) 39.7455 msec/pass
- ET : parse_stringIO (UA T3) 237.9971 msec/pass
+ lxe: parse_stringIO (SAXR T1) 150.2380 msec/pass
+ cET: parse_stringIO (SAXR T1) 25.9311 msec/pass
+ ET : parse_stringIO (SAXR T1) 222.9431 msec/pass
+
+ lxe: parse_stringIO (S-XR T3) 5.9490 msec/pass
+ cET: parse_stringIO (S-XR T3) 5.4519 msec/pass
+ ET : parse_stringIO (S-XR T3) 76.4120 msec/pass
+
+ lxe: parse_stringIO (UAXR T3) 29.3601 msec/pass
+ cET: parse_stringIO (UAXR T3) 28.9941 msec/pass
+ ET : parse_stringIO (UAXR T3) 163.5361 msec/pass
The expat parser allows cET to be up to 80% faster than lxml on plain parser
-performance. The same applies to the ``iterparse()`` function. However, if
-you take a complete serialize-parse cycle, the numbers will look similar to
-these::
-
- lxe: write_utf8_parse_stringIO (S- T1) 187.0444 msec/pass
- cET: write_utf8_parse_stringIO (S- T1) 828.4068 msec/pass
- ET : write_utf8_parse_stringIO (S- T1) 1181.0658 msec/pass
-
- lxe: write_utf8_parse_stringIO (UA T2) 213.6599 msec/pass
- cET: write_utf8_parse_stringIO (UA T2) 927.2374 msec/pass
- ET : write_utf8_parse_stringIO (UA T2) 1297.9678 msec/pass
+performance. Similar timings can be observer for the ``iterparse()``
+function. However, if you take a complete serialize-parse cycle, the numbers
+will look similar to these::
+
+ lxe: write_utf8_parse_stringIO (S-TR T1) 316.6230 msec/pass
+ cET: write_utf8_parse_stringIO (S-TR T1) 592.1209 msec/pass
+ ET : write_utf8_parse_stringIO (S-TR T1) 817.9121 msec/pass
+
+ lxe: write_utf8_parse_stringIO (UATR T3) 49.9680 msec/pass
+ cET: write_utf8_parse_stringIO (UATR T3) 434.6111 msec/pass
+ ET : write_utf8_parse_stringIO (UATR T3) 574.1441 msec/pass
+
+ lxe: write_utf8_parse_stringIO (SATR T4) 1.2789 msec/pass
+ cET: write_utf8_parse_stringIO (SATR T4) 12.2640 msec/pass
+ ET : write_utf8_parse_stringIO (SATR T4) 15.6620 msec/pass
For applications that require a high parser throughput and do little
serialization, cET is the best choice. Also for iterparse applications that
@@ -114,22 +132,20 @@
(given in seconds)::
lxe: -- S- U- -A SA UA
- T1: 0.1360 0.1214 0.1214 0.1217 0.1232 0.1226
- T2: 0.1258 0.1257 0.1250 0.1348 0.1359 0.1358
- T3: 0.0354 0.0282 0.0288 0.0850 0.0860 0.0862
- T4: 0.0006 0.0006 0.0006 0.0019 0.0018 0.0019
-
+ T1: 0.1029 0.1005 0.0998 0.1003 0.0998 0.1002
+ T2: 0.1035 0.1013 0.1015 0.1090 0.1089 0.1090
+ T3: 0.0276 0.0270 0.0273 0.0679 0.0673 0.0673
+ T4: 0.0004 0.0004 0.0004 0.0013 0.0013 0.0013
cET: -- S- U- -A SA UA
- T1: 0.0417 0.0409 0.0403 0.0410 0.0410 0.0415
- T2: 0.0413 0.0414 0.0413 0.0417 0.0411 0.0417
- T3: 0.0097 0.0100 0.0099 0.0187 0.0142 0.0146
+ T1: 0.0277 0.0273 0.0273 0.0272 0.0278 0.0275
+ T2: 0.0281 0.0347 0.0281 0.0285 0.0284 0.0284
+ T3: 0.0074 0.0074 0.0074 0.0122 0.0102 0.0101
T4: 0.0001 0.0001 0.0001 0.0001 0.0001 0.0001
-
ET : -- S- U- -A SA UA
- T1: 0.2189 0.2832 0.2210 0.2646 0.2905 0.2214
- T2: 0.3022 0.2322 0.2868 0.3192 0.2290 0.3075
- T3: 0.0519 0.0553 0.0527 0.0601 0.0572 0.0911
- T4: 0.0009 0.0008 0.0008 0.0008 0.0009 0.0009
+ T1: 0.1349 0.1962 0.2356 0.1288 0.2642 0.1351
+ T2: 0.3104 0.1344 0.3566 0.3857 0.1354 0.4677
+ T3: 0.0313 0.0325 0.0312 0.0356 0.3803 0.0364
+ T4: 0.0005 0.0005 0.0008 0.0006 0.0007 0.0006
While lxml is still faster than ET in most cases (30-60%), cET can be up to
three times faster than lxml here. One of the reasons is that lxml must
@@ -141,29 +157,29 @@
Where ET and cET can quickly create a shallow copy of their list of children,
lxml has to create a Python object for each child and collect them in a list::
- lxe: root_getchildren (-- T2 ) 6.3981 msec/pass
- cET: root_getchildren (-- T2 ) 0.0651 msec/pass
- ET : root_getchildren (-- T2 ) 0.0224 msec/pass
+ lxe: root_getchildren (--TR T2) 0.3500 msec/pass
+ cET: root_getchildren (--TR T2) 0.0150 msec/pass
+ ET : root_getchildren (--TR T2) 0.0091 msec/pass
As opposed to ET, libxml2 has a notion of documents that each element must be
in. This results in a major performance difference for creating independent
Elements that end up in independently created documents::
- lxe: create_elements (-- T2 ) 22.0083 msec/pass
- cET: create_elements (-- T2 ) 0.3920 msec/pass
- ET : create_elements (-- T2 ) 3.0865 msec/pass
+ lxe: create_elements (--TC T2) 3.7301 msec/pass
+ cET: create_elements (--TC T2) 0.1960 msec/pass
+ ET : create_elements (--TC T2) 1.4279 msec/pass
Therefore, it is always preferable to create Elements for the document they
are supposed to end up in, either as SubElements of an Element or using the
explicit ``Element.makeelement()`` call::
- lxe: makeelement (-- T2 ) 4.2658 msec/pass
- cET: makeelement (-- T2 ) 0.5658 msec/pass
- ET : makeelement (-- T2 ) 3.7136 msec/pass
-
- lxe: create_subelements (-- T2 ) 3.7640 msec/pass
- cET: create_subelements (-- T2 ) 0.5332 msec/pass
- ET : create_subelements (-- T2 ) 6.5937 msec/pass
+ lxe: makeelement (--TC T2) 2.5990 msec/pass
+ cET: makeelement (--TC T2) 0.3128 msec/pass
+ ET : makeelement (--TC T2) 1.6940 msec/pass
+
+ lxe: create_subelements (--TC T2) 2.3072 msec/pass
+ cET: create_subelements (--TC T2) 0.2370 msec/pass
+ ET : create_subelements (--TC T2) 3.2189 msec/pass
So, if the main performance bottleneck of an application is creating large XML
trees in memory through calls to Element and SubElement, cET is the best
@@ -176,13 +192,13 @@
The following benchmark appends all root children of the second tree to the
root of the first tree::
- lxe: append_from_document (-- T1,T2) 11.7905 msec/pass
- cET: append_from_document (-- T1,T2) 0.4673 msec/pass
- ET : append_from_document (-- T1,T2) 2.0460 msec/pass
-
- lxe: append_from_document (-- T3,T4) 0.1582 msec/pass
- cET: append_from_document (-- T3,T4) 0.0224 msec/pass
- ET : append_from_document (-- T3,T4) 0.1618 msec/pass
+ lxe: append_from_document (--TR T1,T2) 4.3468 msec/pass
+ cET: append_from_document (--TR T1,T2) 0.2608 msec/pass
+ ET : append_from_document (--TR T1,T2) 1.2310 msec/pass
+
+ lxe: append_from_document (--TR T3,T4) 0.0679 msec/pass
+ cET: append_from_document (--TR T3,T4) 0.0148 msec/pass
+ ET : append_from_document (--TR T3,T4) 0.0880 msec/pass
Although these are fairly small numbers compared to parsing, this easily shows
the different performance classes for lxml and (c)ET. Where the latter do not
@@ -193,26 +209,26 @@
This difference is not always as visible, but applies to most parts of the
API, like inserting newly created elements::
- lxe: insert_from_document (-- T1,T2) 16.2342 msec/pass
- cET: insert_from_document (-- T1,T2) 1.1786 msec/pass
- ET : insert_from_document (-- T1,T2) 3.6107 msec/pass
+ lxe: insert_from_document (--TR T1,T2) 6.3150 msec/pass
+ cET: insert_from_document (--TR T1,T2) 0.4039 msec/pass
+ ET : insert_from_document (--TR T1,T2) 1.4770 msec/pass
Or replacing the child slice by a new element::
- lxe: replace_children_element (-- T1 ) 9.1834 msec/pass
- cET: replace_children_element (-- T1 ) 0.9731 msec/pass
- ET : replace_children_element (-- T1 ) 14.8213 msec/pass
+ lxe: replace_children_element (--TC T1) 0.2608 msec/pass
+ cET: replace_children_element (--TC T1) 0.0238 msec/pass
+ ET : replace_children_element (--TC T1) 0.1628 msec/pass
You should keep this difference in mind when you merge very large trees. On
the other hand, deep copying a tree is fast in lxml::
- lxe: deepcopy (-- T1 ) 24.7359 msec/pass
- cET: deepcopy (-- T1 ) 450.5479 msec/pass
- ET : deepcopy (-- T1 ) 717.8308 msec/pass
-
- lxe: deepcopy (-- T3 ) 2.1182 msec/pass
- cET: deepcopy (-- T3 ) 107.2124 msec/pass
- ET : deepcopy (-- T3 ) 173.9782 msec/pass
+ lxe: deepcopy (--TC T1) 10.6010 msec/pass
+ cET: deepcopy (--TC T1) 220.2251 msec/pass
+ ET : deepcopy (--TC T1) 463.7730 msec/pass
+
+ lxe: deepcopy (--TC T3) 8.2979 msec/pass
+ cET: deepcopy (--TC T3) 53.8740 msec/pass
+ ET : deepcopy (--TC T3) 118.2799 msec/pass
So, for example, if you often need to create independent subtrees from a large
tree that you have parsed in, lxml is by far the best choice here.
@@ -226,39 +242,39 @@
especially if few elements are of interest or the element tag name is known,
lxml is a good choice::
- lxe: getiterator_all (-- T2 ) 22.5847 msec/pass
- cET: getiterator_all (-- T2 ) 36.8212 msec/pass
- ET : getiterator_all (-- T2 ) 46.2846 msec/pass
-
- lxe: getiterator_islice (-- T2 ) 2.0421 msec/pass
- cET: getiterator_islice (-- T2 ) 0.3343 msec/pass
- ET : getiterator_islice (-- T2 ) 44.5898 msec/pass
-
- lxe: getiterator_tag (-- T2 ) 1.9593 msec/pass
- cET: getiterator_tag (-- T2 ) 11.7767 msec/pass
- ET : getiterator_tag (-- T2 ) 37.5661 msec/pass
-
- lxe: getiterator_tag_all (-- T2 ) 4.5667 msec/pass
- cET: getiterator_tag_all (-- T2 ) 33.5681 msec/pass
- ET : getiterator_tag_all (-- T2 ) 37.6200 msec/pass
+ lxe: getiterator_all (--TR T2) 10.3800 msec/pass
+ cET: getiterator_all (--TR T2) 28.2831 msec/pass
+ ET : getiterator_all (--TR T2) 26.0720 msec/pass
+
+ lxe: getiterator_islice (--TR T2) 0.1140 msec/pass
+ cET: getiterator_islice (--TR T2) 0.2460 msec/pass
+ ET : getiterator_islice (--TR T2) 26.6550 msec/pass
+
+ lxe: getiterator_tag (--TR T2) 0.3879 msec/pass
+ cET: getiterator_tag (--TR T2) 9.3720 msec/pass
+ ET : getiterator_tag (--TR T2) 22.8221 msec/pass
+
+ lxe: getiterator_tag_all (--TR T2) 0.8819 msec/pass
+ cET: getiterator_tag_all (--TR T2) 27.2939 msec/pass
+ ET : getiterator_tag_all (--TR T2) 22.8271 msec/pass
This similarly shows in ``Element.findall()``::
- lxe: findall (-- T2 ) 26.9907 msec/pass
- cET: findall (-- T2 ) 39.1728 msec/pass
- ET : findall (-- T2 ) 50.9692 msec/pass
-
- lxe: findall (-- T3 ) 3.6452 msec/pass
- cET: findall (-- T3 ) 12.0210 msec/pass
- ET : findall (-- T3 ) 11.2570 msec/pass
-
- lxe: findall_tag (-- T2 ) 4.6065 msec/pass
- cET: findall_tag (-- T2 ) 34.0267 msec/pass
- ET : findall_tag (-- T2 ) 36.7813 msec/pass
-
- lxe: findall_tag (-- T3 ) 0.5884 msec/pass
- cET: findall_tag (-- T3 ) 7.6307 msec/pass
- ET : findall_tag (-- T3 ) 9.2943 msec/pass
+ lxe: findall (--TR T2) 10.9370 msec/pass
+ cET: findall (--TR T2) 28.8639 msec/pass
+ ET : findall (--TR T2) 27.1060 msec/pass
+
+ lxe: findall (--TR T3) 2.1989 msec/pass
+ cET: findall (--TR T3) 8.9881 msec/pass
+ ET : findall (--TR T3) 6.4890 msec/pass
+
+ lxe: findall_tag (--TR T2) 0.9520 msec/pass
+ cET: findall_tag (--TR T2) 27.2651 msec/pass
+ ET : findall_tag (--TR T2) 22.7208 msec/pass
+
+ lxe: findall_tag (--TR T3) 0.1700 msec/pass
+ cET: findall_tag (--TR T3) 6.4540 msec/pass
+ ET : findall_tag (--TR T3) 5.4770 msec/pass
Note that all three libraries currently use the same Python implementation for
``findall()``, except for their native tree iterator.
@@ -267,48 +283,52 @@
XPath
-----
+The following timings are based on the benchmark script `bench_xpath.py`_.
+
This part of lxml does not have an equivalent in ElementTree. However, lxml
provides more than one way of accessing it and you should take care which part
of the lxml API you use. The most straight forward way is to call the
``xpath()`` method on an Element or ElementTree::
- lxe: xpath_method (-- T1) 9.9304 msec/pass
- lxe: xpath_method (-- T2) 29.3595 msec/pass
- lxe: xpath_method (-- T3) 0.2791 msec/pass
- lxe: xpath_method (-- T4) 0.9906 msec/pass
+ lxe: xpath_method (--TC T1) 1.0180 msec/pass
+ lxe: xpath_method (--TC T2) 20.3521 msec/pass
+ lxe: xpath_method (--TC T3) 0.1259 msec/pass
+ lxe: xpath_method (--TC T4) 1.0169 msec/pass
This is well suited for testing and when the XPath expressions are as diverse
as the trees they are called on. However, if you have a single XPath
expression that you want to apply to a larger number of different elements,
the ``XPath`` class is the most efficient way to do it::
- lxe: xpath_class (-- T1) 4.7921 msec/pass
- lxe: xpath_class (-- T2) 9.6187 msec/pass
- lxe: xpath_class (-- T3) 0.2215 msec/pass
- lxe: xpath_class (-- T4) 0.2697 msec/pass
+ lxe: xpath_class (--TC T1) 0.1891 msec/pass
+ lxe: xpath_class (--TC T2) 3.0179 msec/pass
+ lxe: xpath_class (--TC T3) 0.0570 msec/pass
+ lxe: xpath_class (--TC T4) 0.1910 msec/pass
Note that this still allows you to use variables in the expression, so you can
parse it once and then adapt it through variables at call time. In other
cases, where you have a fixed Element or ElementTree and want to run different
expressions on it, you should consider the ``XPathEvaluator``::
- lxe: xpath_element (-- T1) 5.3826 msec/pass
- lxe: xpath_element (-- T2) 11.3929 msec/pass
- lxe: xpath_element (-- T3) 0.2514 msec/pass
- lxe: xpath_element (-- T4) 0.3038 msec/pass
+ lxe: xpath_element (--TR T1) 0.4089 msec/pass
+ lxe: xpath_element (--TR T2) 5.9960 msec/pass
+ lxe: xpath_element (--TR T3) 0.1230 msec/pass
+ lxe: xpath_element (--TR T4) 0.3440 msec/pass
While it looks slightly slower, creating an XPath object for each of the
expressions generates a much higher overhead here::
- lxe: xpath_class_repeat (-- T1) 6.8099 msec/pass
- lxe: xpath_class_repeat (-- T2) 26.7462 msec/pass
- lxe: xpath_class_repeat (-- T3) 0.3126 msec/pass
- lxe: xpath_class_repeat (-- T4) 1.1111 msec/pass
+ lxe: xpath_class_repeat (--TC T1) 1.0259 msec/pass
+ lxe: xpath_class_repeat (--TC T2) 20.4861 msec/pass
+ lxe: xpath_class_repeat (--TC T3) 0.1280 msec/pass
+ lxe: xpath_class_repeat (--TC T4) 1.0269 msec/pass
lxml.objectify
--------------
+The following timings are based on the benchmark script `bench_objectify.py`_.
+
Objectify is a data-binding API for XML based on lxml.etree, that was added in
version 1.1. It uses standard Python attribute access to traverse the XML
tree. It also features ObjectPath, a fast path language based on the same
@@ -325,21 +345,21 @@
tree. It avoids step-by-step Python element instantiations along the path,
which can substantially improve the access time::
- lxe: attribute (--T T1) 14.8621 msec/pass
- lxe: attribute (--T T2) 61.8820 msec/pass
- lxe: attribute (--T T4) 14.9317 msec/pass
-
- lxe: objectpath (--T T1) 13.7311 msec/pass
- lxe: objectpath (--T T2) 58.5930 msec/pass
- lxe: objectpath (--T T4) 8.0961 msec/pass
-
- lxe: attributes_deep (--T T1) 81.4488 msec/pass
- lxe: attributes_deep (--T T2) 77.0266 msec/pass
- lxe: attributes_deep (--T T4) 27.1226 msec/pass
-
- lxe: objectpath_deep (--T T1) 63.1915 msec/pass
- lxe: objectpath_deep (--T T2) 65.2469 msec/pass
- lxe: objectpath_deep (--T T4) 11.0138 msec/pass
+ lxe: attribute (--TR T1) 10.6189 msec/pass
+ lxe: attribute (--TR T2) 53.7431 msec/pass
+ lxe: attribute (--TR T4) 10.3359 msec/pass
+
+ lxe: objectpath (--TR T1) 5.8351 msec/pass
+ lxe: objectpath (--TR T2) 48.1579 msec/pass
+ lxe: objectpath (--TR T4) 5.6930 msec/pass
+
+ lxe: attributes_deep (--TR T1) 58.7430 msec/pass
+ lxe: attributes_deep (--TR T2) 63.0901 msec/pass
+ lxe: attributes_deep (--TR T4) 17.4620 msec/pass
+
+ lxe: objectpath_deep (--TR T1) 52.1719 msec/pass
+ lxe: objectpath_deep (--TR T2) 52.9201 msec/pass
+ lxe: objectpath_deep (--TR T4) 7.5650 msec/pass
Note, however, that parsing ObjectPath expressions is not for free either, so
this is most effective for frequently accessing the same element.
@@ -361,13 +381,17 @@
subtrees and elements) to cache, you can trade memory usage against access
speed::
- lxe: attribute_cached (--T T1) 10.8343 msec/pass
- lxe: attribute_cached (--T T2) 55.5890 msec/pass
- lxe: attribute_cached (--T T4) 10.9514 msec/pass
-
- lxe: attributes_deep_cached (--T T1) 63.7080 msec/pass
- lxe: attributes_deep_cached (--T T2) 65.6838 msec/pass
- lxe: attributes_deep_cached (--T T4) 15.4514 msec/pass
+ lxe: attribute_cached (--TR T1) 7.9739 msec/pass
+ lxe: attribute_cached (--TR T2) 50.9331 msec/pass
+ lxe: attribute_cached (--TR T4) 7.8540 msec/pass
+
+ lxe: attributes_deep_cached (--TR T1) 51.1391 msec/pass
+ lxe: attributes_deep_cached (--TR T2) 55.7129 msec/pass
+ lxe: attributes_deep_cached (--TR T4) 10.7968 msec/pass
+
+ lxe: objectpath_deep_cached (--TR T1) 47.6151 msec/pass
+ lxe: objectpath_deep_cached (--TR T2) 48.0802 msec/pass
+ lxe: objectpath_deep_cached (--TR T4) 4.0281 msec/pass
Things to note: you cannot currently use ``weakref.WeakKeyDictionary`` objects
for this as lxml's element objects do not support weak references (which are
Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original)
+++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Tue Jun 12 18:28:50 2007
@@ -1406,6 +1406,20 @@
for key, value in sequence_or_dict:
_setAttributeValue(self._element, key, value)
+ def pop(self, key, *default):
+ if python.PyTuple_GET_SIZE(default) > 1:
+ raise TypeError, "pop expected at most 2 arguments, got %d" % \
+ (python.PyTuple_GET_SIZE(default)+1)
+ result = _getAttributeValue(self._element, key, None)
+ if result is None:
+ if python.PyTuple_GET_SIZE(default) == 0:
+ raise KeyError, key
+ else:
+ return python.PyTuple_GET_ITEM(default, 0)
+ else:
+ _delAttribute(self._element, key)
+ return result
+
# ACCESSORS
def __repr__(self):
return repr(dict( _attributeIteratorFactory(self._element, 3) ))
Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py (original)
+++ lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py Tue Jun 12 18:28:50 2007
@@ -70,6 +70,39 @@
self.assertEquals("TEST", root.get("attr"))
self.assertRaises(TypeError, root.set, "newattr", 5)
+ def test_attrib_pop(self):
+ ElementTree = self.etree.ElementTree
+
+ f = StringIO('<doc one="One" two="Two"/>')
+ doc = ElementTree(file=f)
+ root = doc.getroot()
+ self.assertEquals('One', root.attrib['one'])
+ self.assertEquals('Two', root.attrib['two'])
+
+ self.assertEquals('One', root.attrib.pop('one'))
+
+ self.assertEquals(None, root.attrib.get('one'))
+ self.assertEquals('Two', root.attrib['two'])
+
+ def test_attrib_pop_unknown(self):
+ root = self.etree.XML('<doc one="One" two="Two"/>')
+ self.assertRaises(KeyError, root.attrib.pop, 'NONE')
+
+ self.assertEquals('One', root.attrib['one'])
+ self.assertEquals('Two', root.attrib['two'])
+
+ def test_attrib_pop_default(self):
+ root = self.etree.XML('<doc one="One" two="Two"/>')
+ self.assertEquals('Three', root.attrib.pop('three', 'Three'))
+
+ def test_attrib_pop_empty_default(self):
+ root = self.etree.XML('<doc/>')
+ self.assertEquals('Three', root.attrib.pop('three', 'Three'))
+
+ def test_attrib_pop_invalid_args(self):
+ root = self.etree.XML('<doc one="One" two="Two"/>')
+ self.assertRaises(TypeError, root.attrib.pop, 'One', None, None)
+
def test_pi(self):
# lxml.etree separates target and text
Element = self.etree.Element
More information about the lxml-checkins
mailing list