import sys, string, time, copy, gc
from itertools import *
from StringIO import StringIO
TREE_FACTOR = 1 # increase tree size with '-l / '-L' cmd option
_TEXT = "some ASCII text" * TREE_FACTOR
_UTEXT = u"some klingon: \F8D2" * TREE_FACTOR
_ATTRIBUTES = {
'{attr}test1' : _TEXT,
'{attr}test2' : _TEXT,
'bla1' : _TEXT,
'bla2' : _TEXT,
'bla3' : _TEXT
}
def with_attributes(*use_attributes):
"Decorator for benchmarks that use attributes"
vmap = {False : 0, True : 1}
values = [ vmap[bool(v)] for v in use_attributes ]
def set_value(function):
try:
function.ATTRIBUTES.update(values)
except AttributeError:
function.ATTRIBUTES = set(values)
return function
return set_value
def with_text(no_text=False, text=False, utext=False):
"Decorator for benchmarks that use text"
values = []
if no_text:
values.append(0)
if text:
values.append(1)
if utext:
values.append(2)
def set_value(function):
try:
function.TEXT.add(values)
except AttributeError:
function.TEXT = set(values)
return function
return set_value
def onlylib(*libs):
"Decorator to restrict benchmarks to specific libraries"
def set_libs(function):
if libs:
function.LIBS = libs
return function
return set_libs
def serialized(function):
"Decorator for benchmarks that require serialized XML data"
function.STRING = True
return function
class SkippedTest(Exception):
pass
class BenchMarkBase(object):
atoz = string.ascii_lowercase
_LIB_NAME_MAP = {
'etree' : 'lxe',
'ElementTree' : 'ET',
'cElementTree' : 'cET'
}
SEARCH_TAG = "{cdefg}a00001"
def __init__(self, etree):
self.etree = etree
libname = etree.__name__.split('.')[-1]
self.lib_name = self._LIB_NAME_MAP.get(libname, libname)
if libname == 'etree':
deepcopy = copy.deepcopy
def set_property(root, fname):
setattr(self, fname, lambda : deepcopy(root))
xml = self._serialize_tree(root)
setattr(self, fname + '_xml', lambda : xml)
else:
def set_property(root, fname):
setattr(self, fname, self.et_make_clone_factory(root))
xml = self._serialize_tree(root)
setattr(self, fname + '_xml', lambda : xml)
attribute_list = list(izip(count(), ({}, _ATTRIBUTES)))
text_list = list(izip(count(), (None, _TEXT, _UTEXT)))
build_name = self._tree_builder_name
self.setup_times = []
for tree in self._all_trees():
times = []
self.setup_times.append(times)
setup = getattr(self, '_setup_tree%d' % tree)
for an, attributes in attribute_list:
for tn, text in text_list:
root, t = setup(text, attributes)
times.append(t)
set_property(root, build_name(tree, tn, an))
def _tree_builder_name(self, tree, tn, an):
return '_root%d_T%d_A%d' % (tree, tn, an)
def tree_builder(self, tree, tn, an, serial):
name = self._tree_builder_name(tree, tn, an)
if serial:
name += '_xml'
return getattr(self, name)
def _serialize_tree(self, root):
return self.etree.tostring(root, 'UTF-8')
def et_make_clone_factory(self, elem):
def generate_elem(append, elem, level):
var = "e" + str(level)
arg = repr(elem.tag)
if elem.attrib:
arg += ", **%r" % elem.attrib
if level == 1:
append(" e1 = Element(%s)" % arg)
else:
append(" %s = SubElement(e%d, %s)" % (var, level-1, arg))
if elem.text:
append(" %s.text = %r" % (var, elem.text))
if elem.tail:
append(" %s.tail = %r" % (var, elem.tail))
for e in elem:
generate_elem(append, e, level+1)
# generate code for a function that creates a tree
output = ["def element_factory():"]
generate_elem(output.append, elem, 1)
output.append(" return e1")
# setup global function namespace
namespace = {
"Element" : self.etree.Element,
"SubElement" : self.etree.SubElement
}
# create function object
exec "\n".join(output) in namespace
return namespace["element_factory"]
def _all_trees(self):
all_trees = []
for name in dir(self):
if name.startswith('_setup_tree'):
all_trees.append(int(name[11:]))
return all_trees
def _setup_tree1(self, text, attributes):
"tree with 26 2nd level and 520 * TREE_FACTOR 3rd level children"
atoz = self.atoz
SubElement = self.etree.SubElement
current_time = time.time
t = current_time()
root = self.etree.Element('{abc}rootnode')
for ch1 in atoz:
el = SubElement(root, "{bcd}"+ch1*5, attributes)
el.text = text
for ch2 in atoz:
for i in range(20 * TREE_FACTOR):
SubElement(el, "{cdefg}%s%05d" % (ch2, i))
t = current_time() - t
return (root, t)
def _setup_tree2(self, text, attributes):
"tree with 520 * TREE_FACTOR 2nd level and 26 3rd level children"
atoz = self.atoz
SubElement = self.etree.SubElement
current_time = time.time
t = current_time()
root = self.etree.Element('{abc}rootnode')
for ch1 in atoz:
for i in range(20 * TREE_FACTOR):
el = SubElement(root, "{bcd}"+ch1*5, attributes)
el.text = text
for ch2 in atoz:
SubElement(el, "{cdefg}%s%05d" % (ch2, i))
t = current_time() - t
return (root, t)
def _setup_tree3(self, text, attributes):
"tree of depth 8 + TREE_FACTOR with 3 children per node"
SubElement = self.etree.SubElement
current_time = time.time
t = current_time()
root = self.etree.Element('{abc}rootnode')
children = [root]
for i in range(6 + TREE_FACTOR):
tag_no = count().next
children = [ SubElement(c, "{cdefg}a%05d" % i, attributes)
for i,c in enumerate(chain(children, children, children)) ]
for child in root:
child.text = text
t = current_time() - t
return (root, t)
def _setup_tree4(self, text, attributes):
"small tree with 26 2nd level and 2 3rd level children"
SubElement = self.etree.SubElement
current_time = time.time
t = current_time()
root = self.etree.Element('{abc}rootnode')
children = [root]
for ch1 in self.atoz:
el = SubElement(root, "{bcd}"+ch1*5, attributes)
el.text = text
SubElement(el, "{cdefg}a00001", attributes)
SubElement(el, "{cdefg}a00002", attributes)
t = current_time() - t
return (root, t)
def benchmarks(self):
"""Returns a list of all benchmarks.
A benchmark is a tuple containing a method name and a list of tree
numbers. Trees are prepared by the setup function.
"""
all_trees = self._all_trees()
benchmarks = []
for name in dir(self):
if not name.startswith('bench_'):
continue
method = getattr(self, name)
if hasattr(method, 'LIBS') and self.lib_name not in method.LIBS:
method_call = None
else:
method_call = method
if method.__doc__:
tree_sets = method.__doc__.split()
else:
tree_sets = ()
if tree_sets:
tree_tuples = [ map(int, tree_set.split(','))
for tree_set in tree_sets ]
else:
try:
function = getattr(method, 'im_func', method)
arg_count = method.func_code.co_argcount - 1
except AttributeError:
arg_count = 1
tree_tuples = self._permutations(all_trees, arg_count)
serialized = getattr(method, 'STRING', False)
for tree_tuple in tree_tuples:
for tn in sorted(getattr(method, 'TEXT', (0,))):
for an in sorted(getattr(method, 'ATTRIBUTES', (0,))):
benchmarks.append((name, method_call, tree_tuple,
tn, an, serialized))
return benchmarks
def _permutations(self, seq, count):
def _permutations(prefix, remainder, count):
if count == 0:
return [ prefix[:] ]
count -= 1
perms = []
prefix.append(None)
for pos, el in enumerate(remainder):
new_remainder = remainder[:pos] + remainder[pos+1:]
prefix[-1] = el
perms.extend( _permutations(prefix, new_remainder, count) )
prefix.pop()
return perms
return _permutations([], seq, count)
############################################################
# Benchmarks
############################################################
class BenchMark(BenchMarkBase):
def bench_iter_children(self, root):
for child in root:
pass
def bench_iter_children_reversed(self, root):
for child in reversed(root):
pass
@with_attributes(True, False)
@with_text(text=True, utext=True)
def bench_tostring_utf8(self, root):
self.etree.tostring(root, 'UTF-8')
@with_attributes(True, False)
@with_text(text=True, utext=True)
def bench_tostring_utf16(self, root):
self.etree.tostring(root, 'UTF-16')
@with_attributes(True, False)
@with_text(text=True, utext=True)
def bench_tostring_utf8_unicode_XML(self, root):
xml = unicode(self.etree.tostring(root, 'UTF-8'), 'UTF-8')
self.etree.XML(xml)
@with_attributes(True, False)
@with_text(text=True, utext=True)
def bench_write_utf8_parse_stringIO(self, root):
f = StringIO()
self.etree.ElementTree(root).write(f, 'UTF-8')
f.seek(0)
self.etree.parse(f)
@with_attributes(True, False)
@with_text(text=True, utext=True)
@serialized
def bench_parse_stringIO(self, root_xml):
f = StringIO(root_xml)
self.etree.parse(f)
@with_attributes(True, False)
@with_text(text=True, utext=True)
@serialized
def bench_XML(self, root_xml):
self.etree.XML(root_xml)
@with_attributes(True, False)
@with_text(text=True, utext=True)
@serialized
def bench_iterparse_stringIO(self, root_xml):
f = StringIO(root_xml)
for event, element in self.etree.iterparse(f):
pass
@with_attributes(True, False)
@with_text(text=True, utext=True)
@serialized
def bench_iterparse_stringIO_clear(self, root_xml):
f = StringIO(root_xml)
for event, element in self.etree.iterparse(f):
element.clear()
def bench_append_from_document(self, root1, root2):
# == "1,2 2,3 1,3 3,1 3,2 2,1" # trees 1 and 2, or 2 and 3, or ...
for el in root2:
root1.append(el)
def bench_insert_from_document(self, root1, root2):
for el in root2:
root1.insert(len(root1)/2, el)
def bench_rotate_children(self, root):
# == "1 2 3" # runs on any single tree independently
for i in range(100):
el = root[0]
del root[0]
root.append(el)
def bench_reorder(self, root):
for i in range(1,len(root)/2):
el = root[0]
del root[0]
root[-i:-i] = [ el ]
def bench_reorder_slice(self, root):
for i in range(1,len(root)/2):
els = root[0:1]
del root[0]
root[-i:-i] = els
def bench_clear(self, root):
root.clear()
def bench_has_children(self, root):
for child in root:
if child and child and child and child and child:
pass
def bench_len(self, root):
for child in root:
map(len, repeat(child, 20))
def bench_create_subelements(self, root):
SubElement = self.etree.SubElement
for child in root:
SubElement(child, '{test}test')
def bench_append_elements(self, root):
Element = self.etree.Element
for child in root:
el = Element('{test}test')
child.append(el)
def bench_makeelement(self, root):
empty_attrib = {}
for child in root:
child.makeelement('{test}test', empty_attrib)
def bench_create_elements(self, root):
Element = self.etree.Element
for child in root:
Element('{test}test')
def bench_replace_children_element(self, root):
Element = self.etree.Element
for child in root:
el = Element('{test}test')
child[:] = [el]
def bench_replace_children(self, root):
Element = self.etree.Element
for child in root:
child[:] = [ child[0] ]
def bench_remove_children(self, root):
for child in root:
root.remove(child)
def bench_remove_children_reversed(self, root):
for child in reversed(root[:]):
root.remove(child)
def bench_set_attributes(self, root):
for child in root:
child.set('a', 'bla')
@with_attributes(True)
def bench_get_attributes(self, root):
for child in root:
child.get('bla1')
child.get('{attr}test1')
def bench_setget_attributes(self, root):
for child in root:
child.set('a', 'bla')
for child in root:
child.get('a')
def bench_root_getchildren(self, root):
root.getchildren()
def bench_getchildren(self, root):
for child in root:
child.getchildren()
def bench_get_children_slice(self, root):
for child in root:
child[:]
def bench_get_children_slice_2x(self, root):
for child in root:
children = child[:]
child[:]
def bench_deepcopy(self, root):
for child in root:
copy.deepcopy(child)
def bench_deepcopy_all(self, root):
copy.deepcopy(root)
def bench_tag(self, root):
for child in root:
child.tag
def bench_tag_repeat(self, root):
for child in root:
for i in repeat(0, 100):
child.tag
@with_text(utext=True, text=True, no_text=True)
def bench_text(self, root):
for child in root:
child.text
@with_text(utext=True, text=True, no_text=True)
def bench_text_repeat(self, root):
repeat = range(500)
for child in root:
for i in repeat:
child.text
def bench_set_text(self, root):
text = _TEXT
for child in root:
child.text = text
def bench_set_utext(self, root):
text = _UTEXT
for child in root:
child.text = text
@onlylib('lxe')
def bench_index(self, root):
for child in root:
root.index(child)
@onlylib('lxe')
def bench_index_slice(self, root):
for child in root[5:100]:
root.index(child, 5, 100)
@onlylib('lxe')
def bench_index_slice_neg(self, root):
for child in root[-100:-5]:
root.index(child, start=-100, stop=-5)
def bench_getiterator_all(self, root):
list(root.getiterator())
def bench_getiterator_islice(self, root):
list(islice(root.getiterator(), 10, 110))
def bench_getiterator_tag(self, root):
list(islice(root.getiterator(self.SEARCH_TAG), 3, 10))
def bench_getiterator_tag_all(self, root):
list(root.getiterator(self.SEARCH_TAG))
def bench_getiterator_tag_text(self, root):
[ e.text for e in root.getiterator(self.SEARCH_TAG) ]
def bench_findall(self, root):
root.findall(".//*")
def bench_findall_tag(self, root):
root.findall(".//" + self.SEARCH_TAG)
@onlylib('lxe')
def bench_xpath_class(self, root):
xpath = self.etree.XPath("./*[0]")
for child in root:
xpath(child)
@onlylib('lxe')
def bench_xpath_class_repeat(self, root):
for child in root:
xpath = self.etree.XPath("./*[0]")
xpath(child)
@onlylib('lxe')
def bench_xpath_element(self, root):
xpath = self.etree.XPathElementEvaluator(root)
for child in root:
xpath.evaluate("./*[0]")
@onlylib('lxe')
def bench_xpath_method(self, root):
for child in root:
child.xpath("./*[0]")
@onlylib('lxe')
def bench_xpath_extensions_old(self, root):
def return_child(_, element):
if element:
return element[0]
else:
return ()
extensions = {(None, 'child') : return_child}
xpath = self.etree.XPath("child(.)", extensions=extensions)
for child in root:
xpath(child)
@onlylib('lxe')
def bench_xslt_extensions_old(self, root):
tree = self.etree.XML("""\
TEST
""")
def return_child(_, elements):
return elements[0][0]
extensions = {('testns', 'child') : return_child}
transform = self.etree.XSLT(tree, extensions)
for i in range(10):
transform(root)
@onlylib('lxe')
def bench_xslt_document(self, root):
transform = self.etree.XSLT(self.etree.XML("""\
TEST
"""))
transform(root)
############################################################
# Main program
############################################################
if __name__ == '__main__':
import_lxml = True
callgrind_zero = False
if len(sys.argv) > 1:
try:
sys.argv.remove('-i')
# run benchmark 'inplace'
sys.path.insert(0, 'src')
except ValueError:
pass
try:
sys.argv.remove('-nolxml')
# run without lxml
import_lxml = False
except ValueError:
pass
try:
sys.argv.remove('-z')
# reset callgrind after tree setup
callgrind_zero = True
except ValueError:
pass
try:
sys.argv.remove('-l')
# use large trees
TREE_FACTOR *= 2
except ValueError:
pass
try:
sys.argv.remove('-L')
# use LARGE trees
TREE_FACTOR *= 2
except ValueError:
pass
_etrees = []
if import_lxml:
from lxml import etree
_etrees.append(etree)
if len(sys.argv) > 1:
if '-a' in sys.argv or '-c' in sys.argv:
# 'all' or 'C-implementations' ?
try:
import cElementTree as cET
_etrees.append(cET)
except ImportError:
pass
try:
# 'all' ?
sys.argv.remove('-a')
from elementtree import ElementTree as ET
_etrees.append(ET)
except (ValueError, ImportError):
pass
if not _etrees:
print "No library to test. Exiting."
sys.exit(1)
print "Preparing test suites and trees ..."
benchmark_suites = map(BenchMark, _etrees)
# sorted by name and tree tuple
benchmarks = [ sorted(b.benchmarks()) for b in benchmark_suites ]
if len(sys.argv) > 1:
selected = []
for name in sys.argv[1:]:
selected.append(name)
benchmarks = [ [ b for b in bs
if [ match for match in selected
if match in b[0] ] ]
for bs in benchmarks ]
import time
def run_bench(suite, method_name, method_call, tree_set, tn, an, serial):
if method_call is None:
raise SkippedTest
current_time = time.time
call_repeat = range(10)
tree_builders = [ suite.tree_builder(tree, tn, an, serial)
for tree in tree_set ]
times = []
args = ()
for i in range(3):
gc.collect()
gc.disable()
t = 0
for i in call_repeat:
args = [ build() for build in tree_builders ]
t_one_call = current_time()
method_call(*args)
t += current_time() - t_one_call
t = 1000.0 * t / len(call_repeat)
times.append(t)
gc.enable()
del args
return times
def build_treeset_name(trees, tn, an, serialized):
text = {0:'-', 1:'S', 2:'U'}[tn]
attr = {0:'-', 1:'A'}[an]
ser = {True:'X', False:'T'}[serialized]
return "%s%s%s T%s" % (text, attr, ser, ',T'.join(imap(str, trees))[:6])
print "Running benchmark on", ', '.join(b.lib_name
for b in benchmark_suites)
print
print "Setup times for trees in seconds:"
for b in benchmark_suites:
print "%-3s: " % b.lib_name,
for an in (0,1):
for tn in (0,1,2):
print ' %s ' % build_treeset_name((), tn, an, False)[:2],
print
for i, tree_times in enumerate(b.setup_times):
print " T%d:" % (i+1), ' '.join("%6.4f" % t for t in tree_times)
print
if callgrind_zero:
cmd = open("callgrind.cmd", 'w')
cmd.write('Zero\n')
cmd.close()
for bench_calls in izip(*benchmarks):
for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)):
bench_name = benchmark_setup[0]
tree_set_name = build_treeset_name(*benchmark_setup[-4:])
print "%-3s: %-28s" % (bench.lib_name, bench_name[6:34]),
print "(%-10s)" % tree_set_name,
sys.stdout.flush()
try:
result = run_bench(bench, *benchmark_setup)
except SkippedTest:
print "skipped"
except KeyboardInterrupt:
print "interrupted by user"
sys.exit(1)
except Exception, e:
print "failed: %s: %s" % (e.__class__.__name__, e)
else:
print "%9.4f msec/pass, best of (" % min(result),
for t in result:
print "%9.4f" % t,
print ")"
if len(benchmark_suites) > 1:
print # empty line between different benchmarks