[Lxml-checkins] r54103 - in lxml/trunk: . benchmark

scoder at codespeak.net scoder at codespeak.net
Thu Apr 24 22:04:10 CEST 2008


Author: scoder
Date: Thu Apr 24 22:04:08 2008
New Revision: 54103

Modified:
   lxml/trunk/   (props changed)
   lxml/trunk/benchmark/bench_etree.py
   lxml/trunk/benchmark/bench_objectify.py
   lxml/trunk/benchmark/bench_xpath.py
   lxml/trunk/benchmark/benchbase.py
Log:
 r4054 at delle:  sbehnel | 2008-04-24 07:57:21 +0200
 faster benchmark runs: avoid rebuilding trees when the benchmark does not change them


Modified: lxml/trunk/benchmark/bench_etree.py
==============================================================================
--- lxml/trunk/benchmark/bench_etree.py	(original)
+++ lxml/trunk/benchmark/bench_etree.py	Thu Apr 24 22:04:08 2008
@@ -3,7 +3,8 @@
 from StringIO import StringIO
 
 import benchbase
-from benchbase import with_attributes, with_text, onlylib, serialized, children
+from benchbase import (with_attributes, with_text, onlylib,
+                       serialized, children, nochange)
 
 TEXT  = "some ASCII text"
 UTEXT = u"some klingon: \F8D2"
@@ -13,39 +14,47 @@
 ############################################################
 
 class BenchMark(benchbase.TreeBenchMark):
+    @nochange
     def bench_iter_children(self, root):
         for child in root:
             pass
 
+    @nochange
     def bench_iter_children_reversed(self, root):
         for child in reversed(root):
             pass
 
+    @nochange
     def bench_first_child(self, root):
-        for i in range(1000):
+        for i in self.repeat1000:
             child = root[0]
 
+    @nochange
     def bench_last_child(self, root):
-        for i in range(1000):
+        for i in self.repeat1000:
             child = root[-1]
 
+    @nochange
     def bench_middle_child(self, root):
         pos = len(root) / 2
-        for i in range(1000):
+        for i in self.repeat1000:
             child = root[pos]
 
+    @nochange
     @with_attributes(False)
     @with_text(text=True)
     @onlylib('lxe', 'ET')
     def bench_tostring_text_ascii(self, root):
         self.etree.tostring(root, method="text")
 
+    @nochange
     @with_attributes(False)
     @with_text(text=True, utext=True)
     @onlylib('lxe')
     def bench_tostring_text_utf16(self, root):
         self.etree.tostring(root, method="text", encoding='UTF-16')
 
+    @nochange
     @with_attributes(False)
     @with_text(text=True, utext=True)
     @onlylib('lxe', 'ET')
@@ -55,6 +64,7 @@
             self.etree.tostring(child, method="text",
                                 encoding='UTF-8', with_tail=True)
 
+    @nochange
     @with_attributes(False)
     @with_text(text=True, utext=True)
     @onlylib('lxe')
@@ -63,22 +73,26 @@
         for child in children:
             self.etree.tostring(child, method="text", encoding=unicode)
 
+    @nochange
     @with_attributes(True, False)
     @with_text(text=True, utext=True)
     def bench_tostring_utf8(self, root):
         self.etree.tostring(root, encoding='UTF-8')
 
+    @nochange
     @with_attributes(True, False)
     @with_text(text=True, utext=True)
     def bench_tostring_utf16(self, root):
         self.etree.tostring(root, encoding='UTF-16')
 
+    @nochange
     @with_attributes(True, False)
     @with_text(text=True, utext=True)
     def bench_tostring_utf8_unicode_XML(self, root):
         xml = unicode(self.etree.tostring(root, encoding='UTF-8'), 'UTF-8')
         self.etree.XML(xml)
 
+    @nochange
     @with_attributes(True, False)
     @with_text(text=True, utext=True)
     def bench_write_utf8_parse_stringIO(self, root):
@@ -149,12 +163,14 @@
     def bench_clear(self, root):
         root.clear()
 
+    @nochange
     @children
     def bench_has_children(self, children):
         for child in children:
             if child and child and child and child and child:
                 pass
 
+    @nochange
     @children
     def bench_len(self, children):
         for child in children:
@@ -172,12 +188,14 @@
             el = Element('{test}test')
             child.append(el)
 
+    @nochange
     @children
     def bench_makeelement(self, children):
         empty_attrib = {}
         for child in children:
             child.makeelement('{test}test', empty_attrib)
 
+    @nochange
     @children
     def bench_create_elements(self, children):
         Element = self.etree.Element
@@ -224,28 +242,34 @@
         for child in children:
             child.get('a')
 
+    @nochange
     def bench_root_getchildren(self, root):
         root.getchildren()
 
+    @nochange
     def bench_root_list_children(self, root):
         list(root)
 
+    @nochange
     @children
     def bench_getchildren(self, children):
         for child in children:
             child.getchildren()
 
+    @nochange
     @children
     def bench_get_children_slice(self, children):
         for child in children:
             child[:]
 
+    @nochange
     @children
     def bench_get_children_slice_2x(self, children):
         for child in children:
             child[:]
             child[:]
 
+    @nochange
     @children
     @with_attributes(True, False)
     @with_text(utext=True, text=True, no_text=True)
@@ -253,34 +277,38 @@
         for child in children:
             copy.deepcopy(child)
 
+    @nochange
     @with_attributes(True, False)
     @with_text(utext=True, text=True, no_text=True)
     def bench_deepcopy_all(self, root):
         copy.deepcopy(root)
 
+    @nochange
     @children
     def bench_tag(self, children):
         for child in children:
             child.tag
 
+    @nochange
     @children
     def bench_tag_repeat(self, children):
         for child in children:
-            for i in repeat(0, 100):
+            for i in self.repeat100:
                 child.tag
 
+    @nochange
     @with_text(utext=True, text=True, no_text=True)
     @children
     def bench_text(self, children):
         for child in children:
             child.text
 
+    @nochange
     @with_text(utext=True, text=True, no_text=True)
     @children
     def bench_text_repeat(self, children):
-        repeat = range(500)
         for child in children:
-            for i in repeat:
+            for i in self.repeat500:
                 child.text
 
     @children
@@ -295,65 +323,82 @@
         for child in children:
             child.text = text
 
+    @nochange
     @onlylib('lxe')
     def bench_index(self, root):
         for child in root:
             root.index(child)
 
+    @nochange
     @onlylib('lxe')
     def bench_index_slice(self, root):
         for child in root[5:100]:
             root.index(child, 5, 100)
 
+    @nochange
     @onlylib('lxe')
     def bench_index_slice_neg(self, root):
         for child in root[-100:-5]:
             root.index(child, start=-100, stop=-5)
 
+    @nochange
     def bench_getiterator_all(self, root):
         list(root.getiterator())
 
+    @nochange
     def bench_getiterator_islice(self, root):
         list(islice(root.getiterator(), 10, 110))
 
+    @nochange
     def bench_getiterator_tag(self, root):
         list(islice(root.getiterator(self.SEARCH_TAG), 3, 10))
 
+    @nochange
     def bench_getiterator_tag_all(self, root):
         list(root.getiterator(self.SEARCH_TAG))
 
+    @nochange
     def bench_getiterator_tag_none(self, root):
         list(root.getiterator("{ThisShould}NeverExist"))
 
+    @nochange
     def bench_getiterator_tag_text(self, root):
         [ e.text for e in root.getiterator(self.SEARCH_TAG) ]
 
+    @nochange
     def bench_findall(self, root):
         root.findall(".//*")
 
+    @nochange
     def bench_findall_child(self, root):
         root.findall(".//*/" + self.SEARCH_TAG)
 
+    @nochange
     def bench_findall_tag(self, root):
         root.findall(".//" + self.SEARCH_TAG)
 
+    @nochange
     def bench_findall_path(self, root):
         root.findall(".//*[%s]/./%s/./*" % (self.SEARCH_TAG, self.SEARCH_TAG))
 
+    @nochange
     @onlylib('lxe')
     def bench_xpath_path(self, root):
         ns, tag = self.SEARCH_TAG[1:].split('}')
         root.xpath(".//*[p:%s]/./p:%s/./*" % (tag,tag),
                    namespaces = {'p':ns})
 
+    @nochange
     @onlylib('lxe')
     def bench_iterfind(self, root):
         list(root.iterfind(".//*"))
 
+    @nochange
     @onlylib('lxe')
     def bench_iterfind_tag(self, root):
         list(root.iterfind(".//" + self.SEARCH_TAG))
 
+    @nochange
     @onlylib('lxe')
     def bench_iterfind_islice(self, root):
         list(islice(root.iterfind(".//*"), 10, 110))

Modified: lxml/trunk/benchmark/bench_objectify.py
==============================================================================
--- lxml/trunk/benchmark/bench_objectify.py	(original)
+++ lxml/trunk/benchmark/bench_objectify.py	Thu Apr 24 22:04:08 2008
@@ -3,7 +3,8 @@
 from StringIO import StringIO
 
 import benchbase
-from benchbase import with_attributes, with_text, onlylib, serialized, children
+from benchbase import (with_attributes, with_text, onlylib,
+                       serialized, children, nochange)
 
 ############################################################
 # Benchmarks
@@ -22,6 +23,7 @@
         parser.setElementClassLookup(lookup)
         super(BenchMark, self).__init__(etree, parser)
 
+    @nochange
     def bench_attribute(self, root):
         "1 2 4"
         for i in self.repeat3000:
@@ -37,17 +39,20 @@
         for i in self.repeat3000:
             root.XYZ = "5"
 
+    @nochange
     def bench_attribute_cached(self, root):
         "1 2 4"
         cache = root.zzzzz
         for i in self.repeat3000:
             root.zzzzz
 
+    @nochange
     def bench_attributes_deep(self, root):
         "1 2 4"
         for i in self.repeat3000:
             root.zzzzz['{cdefg}a00001']
 
+    @nochange
     def bench_attributes_deep_cached(self, root):
         "1 2 4"
         cache1 = root.zzzzz
@@ -55,18 +60,21 @@
         for i in self.repeat3000:
             root.zzzzz['{cdefg}a00001']
 
+    @nochange
     def bench_objectpath(self, root):
         "1 2 4"
         path = self.objectify.ObjectPath(".zzzzz")
         for i in self.repeat3000:
             path(root)
 
+    @nochange
     def bench_objectpath_deep(self, root):
         "1 2 4"
         path = self.objectify.ObjectPath(".zzzzz.{cdefg}a00001")
         for i in self.repeat3000:
             path(root)
 
+    @nochange
     def bench_objectpath_deep_cached(self, root):
         "1 2 4"
         cache1 = root.zzzzz
@@ -79,9 +87,11 @@
     def bench_annotate(self, root):
         self.objectify.annotate(root)
 
+    @nochange
     def bench_descendantpaths(self, root):
         root.descendantpaths()
 
+    @nochange
     @with_text(text=True)
     def bench_type_inference(self, root):
         "1 2 4"
@@ -89,6 +99,7 @@
         for i in self.repeat1000:
             el.getchildren()
 
+    @nochange
     @with_text(text=True)
     def bench_type_inference_annotated(self, root):
         "1 2 4"
@@ -97,6 +108,7 @@
         for i in self.repeat1000:
             el.getchildren()
 
+    @nochange
     @children
     def bench_elementmaker(self, children):
         E = self.objectify.E

Modified: lxml/trunk/benchmark/bench_xpath.py
==============================================================================
--- lxml/trunk/benchmark/bench_xpath.py	(original)
+++ lxml/trunk/benchmark/bench_xpath.py	Thu Apr 24 22:04:08 2008
@@ -10,6 +10,7 @@
 ############################################################
 
 class XPathBenchMark(benchbase.TreeBenchMark):
+    @nochange
     @onlylib('lxe')
     @children
     def bench_xpath_class(self, children):
@@ -17,6 +18,7 @@
         for child in children:
             xpath(child)
 
+    @nochange
     @onlylib('lxe')
     @children
     def bench_xpath_class_repeat(self, children):
@@ -24,18 +26,21 @@
             xpath = self.etree.XPath("./*[0]")
             xpath(child)
 
+    @nochange
     @onlylib('lxe')
     def bench_xpath_element(self, root):
         xpath = self.etree.XPathElementEvaluator(root)
         for child in root:
             xpath.evaluate("./*[0]")
 
+    @nochange
     @onlylib('lxe')
     @children
     def bench_xpath_method(self, children):
         for child in children:
             child.xpath("./*[0]")
 
+    @nochange
     @onlylib('lxe')
     @children
     def bench_xpath_old_extensions(self, children):
@@ -50,6 +55,7 @@
         for child in children:
             xpath(child)
 
+    @nochange
     @onlylib('lxe')
     @children
     def bench_xpath_extensions(self, children):

Modified: lxml/trunk/benchmark/benchbase.py
==============================================================================
--- lxml/trunk/benchmark/benchbase.py	(original)
+++ lxml/trunk/benchmark/benchbase.py	Thu Apr 24 22:04:08 2008
@@ -18,6 +18,7 @@
 
 
 def initArgs(argv):
+    global TREE_FACTOR
     try:
         argv.remove('-l')
         # use large trees
@@ -83,6 +84,11 @@
     function.CHILDREN = True
     return function
 
+def nochange(function):
+    "Decorator for benchmarks that do not change the XML tree"
+    function.NO_CHANGE = True
+    return function
+
 ############################################################
 # benchmark baseclass
 ############################################################
@@ -92,6 +98,9 @@
 
 class TreeBenchMark(object):
     atoz = string.ascii_lowercase
+    repeat100  = range(100)
+    repeat500  = range(500)
+    repeat1000 = range(1000)
 
     _LIB_NAME_MAP = {
         'etree'        : 'lxe',
@@ -286,12 +295,14 @@
 
             serialized = getattr(method, 'STRING',   False)
             children   = getattr(method, 'CHILDREN', False)
+            no_change  = getattr(method, 'NO_CHANGE', False)
 
             for tree_tuple in tree_tuples:
                 for tn in sorted(getattr(method, 'TEXT', (0,))):
                     for an in sorted(getattr(method, 'ATTRIBUTES', (0,))):
                         benchmarks.append((name, method_call, tree_tuple,
-                                           tn, an, serialized, children))
+                                           tn, an, serialized, children,
+                                           no_change))
 
         return benchmarks
 
@@ -349,7 +360,8 @@
             print "     T%d:" % (i+1), ' '.join("%6.4f" % t for t in tree_times)
     print
 
-def runBench(suite, method_name, method_call, tree_set, tn, an, serial, children):
+def runBench(suite, method_name, method_call, tree_set, tn, an,
+             serial, children, no_change):
     if method_call is None:
         raise SkippedTest
 
@@ -359,14 +371,19 @@
     tree_builders = [ suite.tree_builder(tree, tn, an, serial, children)
                       for tree in tree_set ]
 
+    if no_change or serial:
+        args = tuple([ build() for build in tree_builders ])
+    else:
+        args = ()
+
     times = []
-    args = ()
+    gc.collect()
     for i in range(3):
-        gc.collect()
         gc.disable()
         t = -1
         for i in call_repeat:
-            args = [ build() for build in tree_builders ]
+            if not no_change and not serial:
+                args = [ build() for build in tree_builders ]
             t_one_call = current_time()
             method_call(*args)
             t_one_call = current_time() - t_one_call
@@ -376,14 +393,16 @@
                 t = min(t, t_one_call)
         times.append(1000.0 * t)
         gc.enable()
-        del args
+        gc.collect()
+        if not isinstance(args, tuple):
+            del args
     return times
 
 def runBenchmarks(benchmark_suites, benchmarks):
     for bench_calls in izip(*benchmarks):
         for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)):
             bench_name = benchmark_setup[0]
-            tree_set_name = build_treeset_name(*benchmark_setup[-5:])
+            tree_set_name = build_treeset_name(*benchmark_setup[-6:-1])
             print "%-3s: %-28s" % (bench.lib_name, bench_name[6:34]),
             print "(%-10s)" % tree_set_name,
             sys.stdout.flush()


More information about the lxml-checkins mailing list