[Lxml-checkins] r54103 - in lxml/trunk: . benchmark
scoder at codespeak.net
scoder at codespeak.net
Thu Apr 24 22:04:10 CEST 2008
Author: scoder
Date: Thu Apr 24 22:04:08 2008
New Revision: 54103
Modified:
lxml/trunk/ (props changed)
lxml/trunk/benchmark/bench_etree.py
lxml/trunk/benchmark/bench_objectify.py
lxml/trunk/benchmark/bench_xpath.py
lxml/trunk/benchmark/benchbase.py
Log:
r4054 at delle: sbehnel | 2008-04-24 07:57:21 +0200
faster benchmark runs: avoid rebuilding trees when the benchmark does not change them
Modified: lxml/trunk/benchmark/bench_etree.py
==============================================================================
--- lxml/trunk/benchmark/bench_etree.py (original)
+++ lxml/trunk/benchmark/bench_etree.py Thu Apr 24 22:04:08 2008
@@ -3,7 +3,8 @@
from StringIO import StringIO
import benchbase
-from benchbase import with_attributes, with_text, onlylib, serialized, children
+from benchbase import (with_attributes, with_text, onlylib,
+ serialized, children, nochange)
TEXT = "some ASCII text"
UTEXT = u"some klingon: \F8D2"
@@ -13,39 +14,47 @@
############################################################
class BenchMark(benchbase.TreeBenchMark):
+ @nochange
def bench_iter_children(self, root):
for child in root:
pass
+ @nochange
def bench_iter_children_reversed(self, root):
for child in reversed(root):
pass
+ @nochange
def bench_first_child(self, root):
- for i in range(1000):
+ for i in self.repeat1000:
child = root[0]
+ @nochange
def bench_last_child(self, root):
- for i in range(1000):
+ for i in self.repeat1000:
child = root[-1]
+ @nochange
def bench_middle_child(self, root):
pos = len(root) / 2
- for i in range(1000):
+ for i in self.repeat1000:
child = root[pos]
+ @nochange
@with_attributes(False)
@with_text(text=True)
@onlylib('lxe', 'ET')
def bench_tostring_text_ascii(self, root):
self.etree.tostring(root, method="text")
+ @nochange
@with_attributes(False)
@with_text(text=True, utext=True)
@onlylib('lxe')
def bench_tostring_text_utf16(self, root):
self.etree.tostring(root, method="text", encoding='UTF-16')
+ @nochange
@with_attributes(False)
@with_text(text=True, utext=True)
@onlylib('lxe', 'ET')
@@ -55,6 +64,7 @@
self.etree.tostring(child, method="text",
encoding='UTF-8', with_tail=True)
+ @nochange
@with_attributes(False)
@with_text(text=True, utext=True)
@onlylib('lxe')
@@ -63,22 +73,26 @@
for child in children:
self.etree.tostring(child, method="text", encoding=unicode)
+ @nochange
@with_attributes(True, False)
@with_text(text=True, utext=True)
def bench_tostring_utf8(self, root):
self.etree.tostring(root, encoding='UTF-8')
+ @nochange
@with_attributes(True, False)
@with_text(text=True, utext=True)
def bench_tostring_utf16(self, root):
self.etree.tostring(root, encoding='UTF-16')
+ @nochange
@with_attributes(True, False)
@with_text(text=True, utext=True)
def bench_tostring_utf8_unicode_XML(self, root):
xml = unicode(self.etree.tostring(root, encoding='UTF-8'), 'UTF-8')
self.etree.XML(xml)
+ @nochange
@with_attributes(True, False)
@with_text(text=True, utext=True)
def bench_write_utf8_parse_stringIO(self, root):
@@ -149,12 +163,14 @@
def bench_clear(self, root):
root.clear()
+ @nochange
@children
def bench_has_children(self, children):
for child in children:
if child and child and child and child and child:
pass
+ @nochange
@children
def bench_len(self, children):
for child in children:
@@ -172,12 +188,14 @@
el = Element('{test}test')
child.append(el)
+ @nochange
@children
def bench_makeelement(self, children):
empty_attrib = {}
for child in children:
child.makeelement('{test}test', empty_attrib)
+ @nochange
@children
def bench_create_elements(self, children):
Element = self.etree.Element
@@ -224,28 +242,34 @@
for child in children:
child.get('a')
+ @nochange
def bench_root_getchildren(self, root):
root.getchildren()
+ @nochange
def bench_root_list_children(self, root):
list(root)
+ @nochange
@children
def bench_getchildren(self, children):
for child in children:
child.getchildren()
+ @nochange
@children
def bench_get_children_slice(self, children):
for child in children:
child[:]
+ @nochange
@children
def bench_get_children_slice_2x(self, children):
for child in children:
child[:]
child[:]
+ @nochange
@children
@with_attributes(True, False)
@with_text(utext=True, text=True, no_text=True)
@@ -253,34 +277,38 @@
for child in children:
copy.deepcopy(child)
+ @nochange
@with_attributes(True, False)
@with_text(utext=True, text=True, no_text=True)
def bench_deepcopy_all(self, root):
copy.deepcopy(root)
+ @nochange
@children
def bench_tag(self, children):
for child in children:
child.tag
+ @nochange
@children
def bench_tag_repeat(self, children):
for child in children:
- for i in repeat(0, 100):
+ for i in self.repeat100:
child.tag
+ @nochange
@with_text(utext=True, text=True, no_text=True)
@children
def bench_text(self, children):
for child in children:
child.text
+ @nochange
@with_text(utext=True, text=True, no_text=True)
@children
def bench_text_repeat(self, children):
- repeat = range(500)
for child in children:
- for i in repeat:
+ for i in self.repeat500:
child.text
@children
@@ -295,65 +323,82 @@
for child in children:
child.text = text
+ @nochange
@onlylib('lxe')
def bench_index(self, root):
for child in root:
root.index(child)
+ @nochange
@onlylib('lxe')
def bench_index_slice(self, root):
for child in root[5:100]:
root.index(child, 5, 100)
+ @nochange
@onlylib('lxe')
def bench_index_slice_neg(self, root):
for child in root[-100:-5]:
root.index(child, start=-100, stop=-5)
+ @nochange
def bench_getiterator_all(self, root):
list(root.getiterator())
+ @nochange
def bench_getiterator_islice(self, root):
list(islice(root.getiterator(), 10, 110))
+ @nochange
def bench_getiterator_tag(self, root):
list(islice(root.getiterator(self.SEARCH_TAG), 3, 10))
+ @nochange
def bench_getiterator_tag_all(self, root):
list(root.getiterator(self.SEARCH_TAG))
+ @nochange
def bench_getiterator_tag_none(self, root):
list(root.getiterator("{ThisShould}NeverExist"))
+ @nochange
def bench_getiterator_tag_text(self, root):
[ e.text for e in root.getiterator(self.SEARCH_TAG) ]
+ @nochange
def bench_findall(self, root):
root.findall(".//*")
+ @nochange
def bench_findall_child(self, root):
root.findall(".//*/" + self.SEARCH_TAG)
+ @nochange
def bench_findall_tag(self, root):
root.findall(".//" + self.SEARCH_TAG)
+ @nochange
def bench_findall_path(self, root):
root.findall(".//*[%s]/./%s/./*" % (self.SEARCH_TAG, self.SEARCH_TAG))
+ @nochange
@onlylib('lxe')
def bench_xpath_path(self, root):
ns, tag = self.SEARCH_TAG[1:].split('}')
root.xpath(".//*[p:%s]/./p:%s/./*" % (tag,tag),
namespaces = {'p':ns})
+ @nochange
@onlylib('lxe')
def bench_iterfind(self, root):
list(root.iterfind(".//*"))
+ @nochange
@onlylib('lxe')
def bench_iterfind_tag(self, root):
list(root.iterfind(".//" + self.SEARCH_TAG))
+ @nochange
@onlylib('lxe')
def bench_iterfind_islice(self, root):
list(islice(root.iterfind(".//*"), 10, 110))
Modified: lxml/trunk/benchmark/bench_objectify.py
==============================================================================
--- lxml/trunk/benchmark/bench_objectify.py (original)
+++ lxml/trunk/benchmark/bench_objectify.py Thu Apr 24 22:04:08 2008
@@ -3,7 +3,8 @@
from StringIO import StringIO
import benchbase
-from benchbase import with_attributes, with_text, onlylib, serialized, children
+from benchbase import (with_attributes, with_text, onlylib,
+ serialized, children, nochange)
############################################################
# Benchmarks
@@ -22,6 +23,7 @@
parser.setElementClassLookup(lookup)
super(BenchMark, self).__init__(etree, parser)
+ @nochange
def bench_attribute(self, root):
"1 2 4"
for i in self.repeat3000:
@@ -37,17 +39,20 @@
for i in self.repeat3000:
root.XYZ = "5"
+ @nochange
def bench_attribute_cached(self, root):
"1 2 4"
cache = root.zzzzz
for i in self.repeat3000:
root.zzzzz
+ @nochange
def bench_attributes_deep(self, root):
"1 2 4"
for i in self.repeat3000:
root.zzzzz['{cdefg}a00001']
+ @nochange
def bench_attributes_deep_cached(self, root):
"1 2 4"
cache1 = root.zzzzz
@@ -55,18 +60,21 @@
for i in self.repeat3000:
root.zzzzz['{cdefg}a00001']
+ @nochange
def bench_objectpath(self, root):
"1 2 4"
path = self.objectify.ObjectPath(".zzzzz")
for i in self.repeat3000:
path(root)
+ @nochange
def bench_objectpath_deep(self, root):
"1 2 4"
path = self.objectify.ObjectPath(".zzzzz.{cdefg}a00001")
for i in self.repeat3000:
path(root)
+ @nochange
def bench_objectpath_deep_cached(self, root):
"1 2 4"
cache1 = root.zzzzz
@@ -79,9 +87,11 @@
def bench_annotate(self, root):
self.objectify.annotate(root)
+ @nochange
def bench_descendantpaths(self, root):
root.descendantpaths()
+ @nochange
@with_text(text=True)
def bench_type_inference(self, root):
"1 2 4"
@@ -89,6 +99,7 @@
for i in self.repeat1000:
el.getchildren()
+ @nochange
@with_text(text=True)
def bench_type_inference_annotated(self, root):
"1 2 4"
@@ -97,6 +108,7 @@
for i in self.repeat1000:
el.getchildren()
+ @nochange
@children
def bench_elementmaker(self, children):
E = self.objectify.E
Modified: lxml/trunk/benchmark/bench_xpath.py
==============================================================================
--- lxml/trunk/benchmark/bench_xpath.py (original)
+++ lxml/trunk/benchmark/bench_xpath.py Thu Apr 24 22:04:08 2008
@@ -10,6 +10,7 @@
############################################################
class XPathBenchMark(benchbase.TreeBenchMark):
+ @nochange
@onlylib('lxe')
@children
def bench_xpath_class(self, children):
@@ -17,6 +18,7 @@
for child in children:
xpath(child)
+ @nochange
@onlylib('lxe')
@children
def bench_xpath_class_repeat(self, children):
@@ -24,18 +26,21 @@
xpath = self.etree.XPath("./*[0]")
xpath(child)
+ @nochange
@onlylib('lxe')
def bench_xpath_element(self, root):
xpath = self.etree.XPathElementEvaluator(root)
for child in root:
xpath.evaluate("./*[0]")
+ @nochange
@onlylib('lxe')
@children
def bench_xpath_method(self, children):
for child in children:
child.xpath("./*[0]")
+ @nochange
@onlylib('lxe')
@children
def bench_xpath_old_extensions(self, children):
@@ -50,6 +55,7 @@
for child in children:
xpath(child)
+ @nochange
@onlylib('lxe')
@children
def bench_xpath_extensions(self, children):
Modified: lxml/trunk/benchmark/benchbase.py
==============================================================================
--- lxml/trunk/benchmark/benchbase.py (original)
+++ lxml/trunk/benchmark/benchbase.py Thu Apr 24 22:04:08 2008
@@ -18,6 +18,7 @@
def initArgs(argv):
+ global TREE_FACTOR
try:
argv.remove('-l')
# use large trees
@@ -83,6 +84,11 @@
function.CHILDREN = True
return function
+def nochange(function):
+ "Decorator for benchmarks that do not change the XML tree"
+ function.NO_CHANGE = True
+ return function
+
############################################################
# benchmark baseclass
############################################################
@@ -92,6 +98,9 @@
class TreeBenchMark(object):
atoz = string.ascii_lowercase
+ repeat100 = range(100)
+ repeat500 = range(500)
+ repeat1000 = range(1000)
_LIB_NAME_MAP = {
'etree' : 'lxe',
@@ -286,12 +295,14 @@
serialized = getattr(method, 'STRING', False)
children = getattr(method, 'CHILDREN', False)
+ no_change = getattr(method, 'NO_CHANGE', False)
for tree_tuple in tree_tuples:
for tn in sorted(getattr(method, 'TEXT', (0,))):
for an in sorted(getattr(method, 'ATTRIBUTES', (0,))):
benchmarks.append((name, method_call, tree_tuple,
- tn, an, serialized, children))
+ tn, an, serialized, children,
+ no_change))
return benchmarks
@@ -349,7 +360,8 @@
print " T%d:" % (i+1), ' '.join("%6.4f" % t for t in tree_times)
print
-def runBench(suite, method_name, method_call, tree_set, tn, an, serial, children):
+def runBench(suite, method_name, method_call, tree_set, tn, an,
+ serial, children, no_change):
if method_call is None:
raise SkippedTest
@@ -359,14 +371,19 @@
tree_builders = [ suite.tree_builder(tree, tn, an, serial, children)
for tree in tree_set ]
+ if no_change or serial:
+ args = tuple([ build() for build in tree_builders ])
+ else:
+ args = ()
+
times = []
- args = ()
+ gc.collect()
for i in range(3):
- gc.collect()
gc.disable()
t = -1
for i in call_repeat:
- args = [ build() for build in tree_builders ]
+ if not no_change and not serial:
+ args = [ build() for build in tree_builders ]
t_one_call = current_time()
method_call(*args)
t_one_call = current_time() - t_one_call
@@ -376,14 +393,16 @@
t = min(t, t_one_call)
times.append(1000.0 * t)
gc.enable()
- del args
+ gc.collect()
+ if not isinstance(args, tuple):
+ del args
return times
def runBenchmarks(benchmark_suites, benchmarks):
for bench_calls in izip(*benchmarks):
for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)):
bench_name = benchmark_setup[0]
- tree_set_name = build_treeset_name(*benchmark_setup[-5:])
+ tree_set_name = build_treeset_name(*benchmark_setup[-6:-1])
print "%-3s: %-28s" % (bench.lib_name, bench_name[6:34]),
print "(%-10s)" % tree_set_name,
sys.stdout.flush()
More information about the lxml-checkins
mailing list