From scoder at codespeak.net Sun Aug 8 16:19:42 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 8 Aug 2010 16:19:42 +0200 (CEST) Subject: [Lxml-checkins] r76525 - lxml/trunk/src/lxml Message-ID: <20100808141942.7B326282B90@codespeak.net> Author: scoder Date: Sun Aug 8 16:19:39 2010 New Revision: 76525 Modified: lxml/trunk/src/lxml/lxml.etree.pyx Log: fixes for ns prefix name generation Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Sun Aug 8 16:19:39 2010 @@ -306,7 +306,7 @@ document is cleaned up. """ cdef int _ns_counter - cdef object _prefix_tail + cdef bytes _prefix_tail cdef xmlDoc* _c_doc cdef _BaseParser _parser @@ -374,11 +374,11 @@ else: return (self._c_doc.standalone == 1) - cdef buildNewPrefix(self): + cdef bytes buildNewPrefix(self): # get a new unique prefix ("nsX") for this document + cdef bytes ns if self._ns_counter < len(_PREFIX_CACHE): ns = _PREFIX_CACHE[self._ns_counter] - python.Py_INCREF(ns) else: ns = python.PyBytes_FromFormat("ns%d", self._ns_counter) if self._prefix_tail is not None: @@ -388,9 +388,9 @@ # overflow! self._ns_counter = 0 if self._prefix_tail is None: - self._prefix_tail = "A" + self._prefix_tail = b"A" else: - self._prefix_tail += "A" + self._prefix_tail += b"A" return ns cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, From scoder at codespeak.net Sun Aug 8 17:19:04 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 8 Aug 2010 17:19:04 +0200 (CEST) Subject: [Lxml-checkins] r76527 - lxml/trunk Message-ID: <20100808151904.76444282B90@codespeak.net> Author: scoder Date: Sun Aug 8 17:19:03 2010 New Revision: 76527 Added: lxml/trunk/bisect_crashes.py Log: simple script to find crashing subsets of tests in the test suite Added: lxml/trunk/bisect_crashes.py ============================================================================== --- (empty file) +++ lxml/trunk/bisect_crashes.py Sun Aug 8 17:19:03 2010 @@ -0,0 +1,60 @@ + +import os +import sys +import unittest + +# make sure we import test.py from the right place +script_path = os.path.abspath(os.path.dirname(sys.argv[0])) +sys.path.insert(0, script_path) + +test_base_path = os.path.join(script_path, 'src') +sys.path.insert(1, test_base_path) + +import test + +cfg = test.Options() +cfg.verbosity = 1 +cfg.basedir = test_base_path +cfg.unit_tests = True + +def find_tests(): + test_files = test.get_test_files(cfg) + return test.get_test_cases(test_files, cfg) + +def run_tests(test_cases): + print('Running subset of %d tests' % len(test_cases)) + pid = os.fork() + if not pid: + # child executes tests + runner = test.CustomTestRunner(cfg, None) + suite = unittest.TestSuite() + suite.addTests(test_cases) + os._exit( not runner.run(suite).wasSuccessful() ) + cid, retval = os.waitpid(pid, 0) + retval >>= 8 + return retval == 0 + +def bisect_tests(): + tests = find_tests() + print('Found %d tests' % len(tests)) + shift = len(tests) // 4 + while len(tests) > 1 and shift > 0: + mid = len(tests) // 2 + 1 + left, right = tests[:mid], tests[mid:] + + if not run_tests(left): + tests = left + shift = len(tests) // 4 + 1 + break + if not run_tests(right): + tests = right + shift = len(tests) // 4 + 1 + break + + shift //= 2 + tests = tests[shift:] + tests[:shift] + # looks like we can't make the set of tests any smaller + return tests + +if __name__ == '__main__': + print('\n'.join([test.id() for test in bisect_tests()])) From scoder at codespeak.net Sun Aug 8 17:52:03 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 8 Aug 2010 17:52:03 +0200 (CEST) Subject: [Lxml-checkins] r76528 - lxml/trunk Message-ID: <20100808155203.027CA282B90@codespeak.net> Author: scoder Date: Sun Aug 8 17:52:02 2010 New Revision: 76528 Modified: lxml/trunk/bisect_crashes.py Log: better test output Modified: lxml/trunk/bisect_crashes.py ============================================================================== --- lxml/trunk/bisect_crashes.py (original) +++ lxml/trunk/bisect_crashes.py Sun Aug 8 17:52:02 2010 @@ -13,16 +13,25 @@ import test cfg = test.Options() -cfg.verbosity = 1 +cfg.verbosity = 0 cfg.basedir = test_base_path cfg.unit_tests = True +def write(line, *args): + if args: + line = line % args + sys.stderr.write(line + '\n') + + def find_tests(): test_files = test.get_test_files(cfg) return test.get_test_cases(test_files, cfg) def run_tests(test_cases): - print('Running subset of %d tests' % len(test_cases)) + if not test_cases: + return True + write('Running subset of %d tests [%s .. %s]', + len(test_cases), test_cases[0].id(), test_cases[-1].id()) pid = os.fork() if not pid: # child executes tests @@ -36,25 +45,29 @@ def bisect_tests(): tests = find_tests() - print('Found %d tests' % len(tests)) + write('Found %d tests', len(tests)) shift = len(tests) // 4 + last_failed = False while len(tests) > 1 and shift > 0: mid = len(tests) // 2 + 1 left, right = tests[:mid], tests[mid:] if not run_tests(left): + last_failed = True tests = left shift = len(tests) // 4 + 1 break if not run_tests(right): + last_failed = True tests = right shift = len(tests) // 4 + 1 break + last_failed = False shift //= 2 tests = tests[shift:] + tests[:shift] # looks like we can't make the set of tests any smaller - return tests + return last_failed and tests or [] if __name__ == '__main__': - print('\n'.join([test.id() for test in bisect_tests()])) + write('Failing tests:\n%s', '\n'.join([test.id() for test in bisect_tests()])) From scoder at codespeak.net Sun Aug 8 17:59:02 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 8 Aug 2010 17:59:02 +0200 (CEST) Subject: [Lxml-checkins] r76529 - lxml/trunk Message-ID: <20100808155902.0FDEE282B90@codespeak.net> Author: scoder Date: Sun Aug 8 17:59:01 2010 New Revision: 76529 Modified: lxml/trunk/bisect_crashes.py Log: fix error detection in test bisecting Modified: lxml/trunk/bisect_crashes.py ============================================================================== --- lxml/trunk/bisect_crashes.py (original) +++ lxml/trunk/bisect_crashes.py Sun Aug 8 17:59:01 2010 @@ -40,8 +40,9 @@ suite.addTests(test_cases) os._exit( not runner.run(suite).wasSuccessful() ) cid, retval = os.waitpid(pid, 0) - retval >>= 8 - return retval == 0 + if retval: + write('exit status: %d, signal: %d', retval >> 8, retval % 0xFF) + return (retval % 0xFF) == 0 # signal def bisect_tests(): tests = find_tests() From scoder at codespeak.net Sun Aug 8 18:02:59 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 8 Aug 2010 18:02:59 +0200 (CEST) Subject: [Lxml-checkins] r76530 - lxml/trunk Message-ID: <20100808160259.B904B282B90@codespeak.net> Author: scoder Date: Sun Aug 8 18:02:58 2010 New Revision: 76530 Modified: lxml/trunk/bisect_crashes.py Log: fix error handling in test bisecting Modified: lxml/trunk/bisect_crashes.py ============================================================================== --- lxml/trunk/bisect_crashes.py (original) +++ lxml/trunk/bisect_crashes.py Sun Aug 8 18:02:58 2010 @@ -57,16 +57,15 @@ last_failed = True tests = left shift = len(tests) // 4 + 1 - break - if not run_tests(right): + elif not run_tests(right): last_failed = True tests = right shift = len(tests) // 4 + 1 - break - - last_failed = False - shift //= 2 - tests = tests[shift:] + tests[:shift] + else: + # retry + last_failed = False + shift //= 2 + tests = tests[shift:] + tests[:shift] # looks like we can't make the set of tests any smaller return last_failed and tests or [] From scoder at codespeak.net Sun Aug 8 18:15:02 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 8 Aug 2010 18:15:02 +0200 (CEST) Subject: [Lxml-checkins] r76531 - lxml/trunk Message-ID: <20100808161502.E592136C22F@codespeak.net> Author: scoder Date: Sun Aug 8 18:15:01 2010 New Revision: 76531 Modified: lxml/trunk/bisect_crashes.py Log: better test output Modified: lxml/trunk/bisect_crashes.py ============================================================================== --- lxml/trunk/bisect_crashes.py (original) +++ lxml/trunk/bisect_crashes.py Sun Aug 8 18:15:01 2010 @@ -48,26 +48,25 @@ tests = find_tests() write('Found %d tests', len(tests)) shift = len(tests) // 4 - last_failed = False + failed = [] while len(tests) > 1 and shift > 0: mid = len(tests) // 2 + 1 left, right = tests[:mid], tests[mid:] if not run_tests(left): - last_failed = True + failed = left[:] tests = left shift = len(tests) // 4 + 1 elif not run_tests(right): - last_failed = True + failed = right[:] tests = right shift = len(tests) // 4 + 1 else: # retry - last_failed = False shift //= 2 tests = tests[shift:] + tests[:shift] # looks like we can't make the set of tests any smaller - return last_failed and tests or [] + return failed if __name__ == '__main__': write('Failing tests:\n%s', '\n'.join([test.id() for test in bisect_tests()])) From scoder at codespeak.net Sun Aug 8 18:48:36 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 8 Aug 2010 18:48:36 +0200 (CEST) Subject: [Lxml-checkins] r76532 - lxml/trunk Message-ID: <20100808164836.473F3282B90@codespeak.net> Author: scoder Date: Sun Aug 8 18:48:34 2010 New Revision: 76532 Added: lxml/trunk/DD.py Modified: lxml/trunk/bisect_crashes.py Log: use 'real' DD tool for bisecting Added: lxml/trunk/DD.py ============================================================================== --- (empty file) +++ lxml/trunk/DD.py Sun Aug 8 18:48:34 2010 @@ -0,0 +1,943 @@ +#! /usr/bin/env python +# $Id: DD.py,v 1.2 2001/11/05 19:53:33 zeller Exp $ +# Enhanced Delta Debugging class +# Copyright (c) 1999, 2000, 2001 Andreas Zeller. + +# This module (written in Python) implements the base delta debugging +# algorithms and is at the core of all our experiments. This should +# easily run on any platform and any Python version since 1.6. +# +# To plug this into your system, all you have to do is to create a +# subclass with a dedicated `test()' method. Basically, you would +# invoke the DD test case minimization algorithm (= the `ddmin()' +# method) with a list of characters; the `test()' method would combine +# them to a document and run the test. This should be easy to realize +# and give you some good starting results; the file includes a simple +# sample application. +# +# This file is in the public domain; feel free to copy, modify, use +# and distribute this software as you wish - with one exception. +# Passau University has filed a patent for the use of delta debugging +# on program states (A. Zeller: `Isolating cause-effect chains', +# Saarland University, 2001). The fact that this file is publicly +# available does not imply that I or anyone else grants you any rights +# related to this patent. +# +# The use of Delta Debugging to isolate failure-inducing code changes +# (A. Zeller: `Yesterday, my program worked', ESEC/FSE 1999) or to +# simplify failure-inducing input (R. Hildebrandt, A. Zeller: +# `Simplifying failure-inducing input', ISSTA 2000) is, as far as I +# know, not covered by any patent, nor will it ever be. If you use +# this software in any way, I'd appreciate if you include a citation +# such as `This software uses the delta debugging algorithm as +# described in (insert one of the papers above)'. +# +# All about Delta Debugging is found at the delta debugging web site, +# +# http://www.st.cs.uni-sb.de/dd/ +# +# Happy debugging, +# +# Andreas Zeller + + +# Start with some helpers. +class OutcomeCache: + # This class holds test outcomes for configurations. This avoids + # running the same test twice. + + # The outcome cache is implemented as a tree. Each node points + # to the outcome of the remaining list. + # + # Example: ([1, 2, 3], PASS), ([1, 2], FAIL), ([1, 4, 5], FAIL): + # + # (2, FAIL)--(3, PASS) + # / + # (1, None) + # \ + # (4, None)--(5, FAIL) + + def __init__(self): + self.tail = {} # Points to outcome of tail + self.result = None # Result so far + + def add(self, c, result): + """Add (C, RESULT) to the cache. C must be a list of scalars.""" + cs = c[:] + cs.sort() + + p = self + for start in range(len(c)): + if not p.tail.has_key(c[start]): + p.tail[c[start]] = OutcomeCache() + p = p.tail[c[start]] + + p.result = result + + def lookup(self, c): + """Return RESULT if (C, RESULT) is in the cache; None, otherwise.""" + p = self + for start in range(len(c)): + if not p.tail.has_key(c[start]): + return None + p = p.tail[c[start]] + + return p.result + + def lookup_superset(self, c, start = 0): + """Return RESULT if there is some (C', RESULT) in the cache with + C' being a superset of C or equal to C. Otherwise, return None.""" + + # FIXME: Make this non-recursive! + if start >= len(c): + if self.result: + return self.result + elif self.tail != {}: + # Select some superset + superset = self.tail[self.tail.keys()[0]] + return superset.lookup_superset(c, start + 1) + else: + return None + + if self.tail.has_key(c[start]): + return self.tail[c[start]].lookup_superset(c, start + 1) + + # Let K0 be the largest element in TAIL such that K0 <= C[START] + k0 = None + for k in self.tail.keys(): + if (k0 == None or k > k0) and k <= c[start]: + k0 = k + + if k0 != None: + return self.tail[k0].lookup_superset(c, start) + + return None + + def lookup_subset(self, c): + """Return RESULT if there is some (C', RESULT) in the cache with + C' being a subset of C or equal to C. Otherwise, return None.""" + p = self + for start in range(len(c)): + if p.tail.has_key(c[start]): + p = p.tail[c[start]] + + return p.result + + + + +# Test the outcome cache +def oc_test(): + oc = OutcomeCache() + + assert oc.lookup([1, 2, 3]) == None + oc.add([1, 2, 3], 4) + assert oc.lookup([1, 2, 3]) == 4 + assert oc.lookup([1, 2, 3, 4]) == None + + assert oc.lookup([5, 6, 7]) == None + oc.add([5, 6, 7], 8) + assert oc.lookup([5, 6, 7]) == 8 + + assert oc.lookup([]) == None + oc.add([], 0) + assert oc.lookup([]) == 0 + + assert oc.lookup([1, 2]) == None + oc.add([1, 2], 3) + assert oc.lookup([1, 2]) == 3 + assert oc.lookup([1, 2, 3]) == 4 + + assert oc.lookup_superset([1]) == 3 or oc.lookup_superset([1]) == 4 + assert oc.lookup_superset([1, 2]) == 3 or oc.lookup_superset([1, 2]) == 4 + assert oc.lookup_superset([5]) == 8 + assert oc.lookup_superset([5, 6]) == 8 + assert oc.lookup_superset([6, 7]) == 8 + assert oc.lookup_superset([7]) == 8 + assert oc.lookup_superset([]) != None + + assert oc.lookup_superset([9]) == None + assert oc.lookup_superset([7, 9]) == None + assert oc.lookup_superset([-5, 1]) == None + assert oc.lookup_superset([1, 2, 3, 9]) == None + assert oc.lookup_superset([4, 5, 6, 7]) == None + + assert oc.lookup_subset([]) == 0 + assert oc.lookup_subset([1, 2, 3]) == 4 + assert oc.lookup_subset([1, 2, 3, 4]) == 4 + assert oc.lookup_subset([1, 3]) == None + assert oc.lookup_subset([1, 2]) == 3 + + assert oc.lookup_subset([-5, 1]) == None + assert oc.lookup_subset([-5, 1, 2]) == 3 + assert oc.lookup_subset([-5]) == 0 + + +# Main Delta Debugging algorithm. +class DD: + # Delta debugging base class. To use this class for a particular + # setting, create a subclass with an overloaded `test()' method. + # + # Main entry points are: + # - `ddmin()' which computes a minimal failure-inducing configuration, and + # - `dd()' which computes a minimal failure-inducing difference. + # + # See also the usage sample at the end of this file. + # + # For further fine-tuning, you can implement an own `resolve()' + # method (tries to add or remove configuration elements in case of + # inconsistencies), or implement an own `split()' method, which + # allows you to split configurations according to your own + # criteria. + # + # The class includes other previous delta debugging alorithms, + # which are obsolete now; they are only included for comparison + # purposes. + + # Test outcomes. + PASS = "PASS" + FAIL = "FAIL" + UNRESOLVED = "UNRESOLVED" + + # Resolving directions. + ADD = "ADD" # Add deltas to resolve + REMOVE = "REMOVE" # Remove deltas to resolve + + # Debugging output (set to 1 to enable) + debug_test = 0 + debug_dd = 0 + debug_split = 0 + debug_resolve = 0 + + def __init__(self): + self.__resolving = 0 + self.__last_reported_length = 0 + self.monotony = 0 + self.outcome_cache = OutcomeCache() + self.cache_outcomes = 1 + self.minimize = 1 + self.maximize = 1 + self.assume_axioms_hold = 1 + + # Helpers + def __listminus(self, c1, c2): + """Return a list of all elements of C1 that are not in C2.""" + s2 = {} + for delta in c2: + s2[delta] = 1 + + c = [] + for delta in c1: + if not s2.has_key(delta): + c.append(delta) + + return c + + def __listintersect(self, c1, c2): + """Return the common elements of C1 and C2.""" + s2 = {} + for delta in c2: + s2[delta] = 1 + + c = [] + for delta in c1: + if s2.has_key(delta): + c.append(delta) + + return c + + def __listunion(self, c1, c2): + """Return the union of C1 and C2.""" + s1 = {} + for delta in c1: + s1[delta] = 1 + + c = c1[:] + for delta in c2: + if not s1.has_key(delta): + c.append(delta) + + return c + + def __listsubseteq(self, c1, c2): + """Return 1 if C1 is a subset or equal to C2.""" + s2 = {} + for delta in c2: + s2[delta] = 1 + + for delta in c1: + if not s2.has_key(delta): + return 0 + + return 1 + + # Output + def coerce(self, c): + """Return the configuration C as a compact string""" + # Default: use printable representation + return `c` + + def pretty(self, c): + """Like coerce(), but sort beforehand""" + sorted_c = c[:] + sorted_c.sort() + return self.coerce(sorted_c) + + # Testing + def test(self, c): + """Test the configuration C. Return PASS, FAIL, or UNRESOLVED""" + c.sort() + + # If we had this test before, return its result + if self.cache_outcomes: + cached_result = self.outcome_cache.lookup(c) + if cached_result != None: + return cached_result + + if self.monotony: + # Check whether we had a passing superset of this test before + cached_result = self.outcome_cache.lookup_superset(c) + if cached_result == self.PASS: + return self.PASS + + cached_result = self.outcome_cache.lookup_subset(c) + if cached_result == self.FAIL: + return self.FAIL + + if self.debug_test: + print + print "test(" + self.coerce(c) + ")..." + + outcome = self._test(c) + + if self.debug_test: + print "test(" + self.coerce(c) + ") = " + `outcome` + + if self.cache_outcomes: + self.outcome_cache.add(c, outcome) + + return outcome + + def _test(self, c): + """Stub to overload in subclasses""" + return self.UNRESOLVED # Placeholder + + + # Splitting + def split(self, c, n): + """Split C into [C_1, C_2, ..., C_n].""" + if self.debug_split: + print "split(" + self.coerce(c) + ", " + `n` + ")..." + + outcome = self._split(c, n) + + if self.debug_split: + print "split(" + self.coerce(c) + ", " + `n` + ") = " + `outcome` + + return outcome + + def _split(self, c, n): + """Stub to overload in subclasses""" + subsets = [] + start = 0 + for i in range(n): + subset = c[start:start + (len(c) - start) / (n - i)] + subsets.append(subset) + start = start + len(subset) + return subsets + + + # Resolving + def resolve(self, csub, c, direction): + """If direction == ADD, resolve inconsistency by adding deltas + to CSUB. Otherwise, resolve by removing deltas from CSUB.""" + + if self.debug_resolve: + print "resolve(" + `csub` + ", " + self.coerce(c) + ", " + \ + `direction` + ")..." + + outcome = self._resolve(csub, c, direction) + + if self.debug_resolve: + print "resolve(" + `csub` + ", " + self.coerce(c) + ", " + \ + `direction` + ") = " + `outcome` + + return outcome + + + def _resolve(self, csub, c, direction): + """Stub to overload in subclasses.""" + # By default, no way to resolve + return None + + + # Test with fixes + def test_and_resolve(self, csub, r, c, direction): + """Repeat testing CSUB + R while unresolved.""" + + initial_csub = csub[:] + c2 = self.__listunion(r, c) + + csubr = self.__listunion(csub, r) + t = self.test(csubr) + + # necessary to use more resolving mechanisms which can reverse each + # other, can (but needn't) be used in subclasses + self._resolve_type = 0 + + while t == self.UNRESOLVED: + self.__resolving = 1 + csubr = self.resolve(csubr, c, direction) + + if csubr == None: + # Nothing left to resolve + break + + if len(csubr) >= len(c2): + # Added everything: csub == c2. ("Upper" Baseline) + # This has already been tested. + csubr = None + break + + if len(csubr) <= len(r): + # Removed everything: csub == r. (Baseline) + # This has already been tested. + csubr = None + break + + t = self.test(csubr) + + self.__resolving = 0 + if csubr == None: + return self.UNRESOLVED, initial_csub + + # assert t == self.PASS or t == self.FAIL + csub = self.__listminus(csubr, r) + return t, csub + + # Inquiries + def resolving(self): + """Return 1 while resolving.""" + return self.__resolving + + + # Logging + def report_progress(self, c, title): + if len(c) != self.__last_reported_length: + print + print title + ": " + `len(c)` + " deltas left:", self.coerce(c) + self.__last_reported_length = len(c) + + + # Delta Debugging (old ESEC/FSE version) + def old_dd(self, c, r = [], n = 2): + """Return the failure-inducing subset of C""" + + assert self.test([]) == dd.PASS + assert self.test(c) == dd.FAIL + + if self.debug_dd: + print ("dd(" + self.pretty(c) + ", " + `r` + ", " + `n` + ")...") + + outcome = self._old_dd(c, r, n) + + if self.debug_dd: + print ("dd(" + self.pretty(c) + ", " + `r` + ", " + `n` + + ") = " + `outcome`) + + return outcome + + def _old_dd(self, c, r, n): + """Stub to overload in subclasses""" + + if r == []: + assert self.test([]) == self.PASS + assert self.test(c) == self.FAIL + else: + assert self.test(r) != self.FAIL + assert self.test(c + r) != self.PASS + + assert self.__listintersect(c, r) == [] + + if len(c) == 1: + # Nothing to split + return c + + run = 1 + next_c = c[:] + next_r = r[:] + + # We replace the tail recursion from the paper by a loop + while 1: + self.report_progress(c, "dd") + + cs = self.split(c, n) + + print + print "dd (run #" + `run` + "): trying", + for i in range(n): + if i > 0: + print "+", + print len(cs[i]), + print + + # Check subsets + ts = [] + for i in range(n): + if self.debug_dd: + print "dd: trying cs[" + `i` + "] =", self.pretty(cs[i]) + + t, cs[i] = self.test_and_resolve(cs[i], r, c, self.REMOVE) + ts.append(t) + if t == self.FAIL: + # Found + if self.debug_dd: + print "dd: found", len(cs[i]), "deltas:", + print self.pretty(cs[i]) + return self.dd(cs[i], r) + + # Check complements + cbars = [] + tbars = [] + + for i in range(n): + cbar = self.__listminus(c, cs[i] + r) + tbar, cbar = self.test_and_resolve(cbar, r, c, self.ADD) + + + doubled = self.__listintersect(cbar, cs[i]) + if doubled != []: + cs[i] = self.__listminus(cs[i], doubled) + + + cbars.append(cbar) + tbars.append(tbar) + + if ts[i] == self.PASS and tbars[i] == self.PASS: + # Interference + if self.debug_dd: + print "dd: interference of", self.pretty(cs[i]), + print "and", self.pretty(cbars[i]) + + d = self.dd(cs[i][:], cbars[i] + r) + dbar = self.dd(cbars[i][:], cs[i] + r) + return d + dbar + + if ts[i] == self.UNRESOLVED and tbars[i] == self.PASS: + # Preference + if self.debug_dd: + print "dd: preferring", len(cs[i]), "deltas:", + print self.pretty(cs[i]) + + return self.dd(cs[i][:], cbars[i] + r) + + if ts[i] == self.PASS or tbars[i] == self.FAIL: + if self.debug_dd: + excluded = self.__listminus(next_c, cbars[i]) + print "dd: excluding", len(excluded), "deltas:", + print self.pretty(excluded) + + if ts[i] == self.PASS: + next_r = self.__listunion(next_r, cs[i]) + next_c = self.__listintersect(next_c, cbars[i]) + self.report_progress(next_c, "dd") + + next_n = min(len(next_c), n * 2) + + if next_n == n and next_c[:] == c[:] and next_r[:] == r[:]: + # Nothing left + if self.debug_dd: + print "dd: nothing left" + return next_c + + # Try again + if self.debug_dd: + print "dd: try again" + + c = next_c + r = next_r + n = next_n + run = run + 1 + + + def test_mix(self, csub, c, direction): + if self.minimize: + (t, csub) = self.test_and_resolve(csub, [], c, direction) + if t == self.FAIL: + return (t, csub) + + if self.maximize: + csubbar = self.__listminus(self.CC, csub) + cbar = self.__listminus(self.CC, c) + if direction == self.ADD: + directionbar = self.REMOVE + else: + directionbar = self.ADD + + (tbar, csubbar) = self.test_and_resolve(csubbar, [], cbar, + directionbar) + + csub = self.__listminus(self.CC, csubbar) + + if tbar == self.PASS: + t = self.FAIL + elif tbar == self.FAIL: + t = self.PASS + else: + t = self.UNRESOLVED + + return (t, csub) + + + # Delta Debugging (new ISSTA version) + def ddgen(self, c, minimize, maximize): + """Return a 1-minimal failing subset of C""" + + self.minimize = minimize + self.maximize = maximize + + n = 2 + self.CC = c + + if self.debug_dd: + print ("dd(" + self.pretty(c) + ", " + `n` + ")...") + + outcome = self._dd(c, n) + + if self.debug_dd: + print ("dd(" + self.pretty(c) + ", " + `n` + ") = " + `outcome`) + + return outcome + + def _dd(self, c, n): + """Stub to overload in subclasses""" + + assert self.test([]) == self.PASS + + run = 1 + cbar_offset = 0 + + # We replace the tail recursion from the paper by a loop + while 1: + tc = self.test(c) + assert tc == self.FAIL or tc == self.UNRESOLVED + + if n > len(c): + # No further minimizing + print "dd: done" + return c + + self.report_progress(c, "dd") + + cs = self.split(c, n) + + print + print "dd (run #" + `run` + "): trying", + for i in range(n): + if i > 0: + print "+", + print len(cs[i]), + print + + c_failed = 0 + cbar_failed = 0 + + next_c = c[:] + next_n = n + + # Check subsets + for i in range(n): + if self.debug_dd: + print "dd: trying", self.pretty(cs[i]) + + (t, cs[i]) = self.test_mix(cs[i], c, self.REMOVE) + + if t == self.FAIL: + # Found + if self.debug_dd: + print "dd: found", len(cs[i]), "deltas:", + print self.pretty(cs[i]) + + c_failed = 1 + next_c = cs[i] + next_n = 2 + cbar_offset = 0 + self.report_progress(next_c, "dd") + break + + if not c_failed: + # Check complements + cbars = n * [self.UNRESOLVED] + + # print "cbar_offset =", cbar_offset + + for j in range(n): + i = (j + cbar_offset) % n + cbars[i] = self.__listminus(c, cs[i]) + t, cbars[i] = self.test_mix(cbars[i], c, self.ADD) + + doubled = self.__listintersect(cbars[i], cs[i]) + if doubled != []: + cs[i] = self.__listminus(cs[i], doubled) + + if t == self.FAIL: + if self.debug_dd: + print "dd: reduced to", len(cbars[i]), + print "deltas:", + print self.pretty(cbars[i]) + + cbar_failed = 1 + next_c = self.__listintersect(next_c, cbars[i]) + next_n = next_n - 1 + self.report_progress(next_c, "dd") + + # In next run, start removing the following subset + cbar_offset = i + break + + if not c_failed and not cbar_failed: + if n >= len(c): + # No further minimizing + print "dd: done" + return c + + next_n = min(len(c), n * 2) + print "dd: increase granularity to", next_n + cbar_offset = (cbar_offset * next_n) / n + + c = next_c + n = next_n + run = run + 1 + + def ddmin(self, c): + return self.ddgen(c, 1, 0) + + def ddmax(self, c): + return self.ddgen(c, 0, 1) + + def ddmix(self, c): + return self.ddgen(c, 1, 1) + + + # General delta debugging (new TSE version) + def dddiff(self, c): + n = 2 + + if self.debug_dd: + print ("dddiff(" + self.pretty(c) + ", " + `n` + ")...") + + outcome = self._dddiff([], c, n) + + if self.debug_dd: + print ("dddiff(" + self.pretty(c) + ", " + `n` + ") = " + + `outcome`) + + return outcome + + def _dddiff(self, c1, c2, n): + run = 1 + cbar_offset = 0 + + # We replace the tail recursion from the paper by a loop + while 1: + if self.debug_dd: + print "dd: c1 =", self.pretty(c1) + print "dd: c2 =", self.pretty(c2) + + if self.assume_axioms_hold: + t1 = self.PASS + t2 = self.FAIL + else: + t1 = self.test(c1) + t2 = self.test(c2) + + assert t1 == self.PASS + assert t2 == self.FAIL + assert self.__listsubseteq(c1, c2) + + c = self.__listminus(c2, c1) + + if self.debug_dd: + print "dd: c2 - c1 =", self.pretty(c) + + if n > len(c): + # No further minimizing + print "dd: done" + return (c, c1, c2) + + self.report_progress(c, "dd") + + cs = self.split(c, n) + + print + print "dd (run #" + `run` + "): trying", + for i in range(n): + if i > 0: + print "+", + print len(cs[i]), + print + + progress = 0 + + next_c1 = c1[:] + next_c2 = c2[:] + next_n = n + + # Check subsets + for j in range(n): + i = (j + cbar_offset) % n + + if self.debug_dd: + print "dd: trying", self.pretty(cs[i]) + + (t, csub) = self.test_and_resolve(cs[i], c1, c, self.REMOVE) + csub = self.__listunion(c1, csub) + + if t == self.FAIL and t1 == self.PASS: + # Found + progress = 1 + next_c2 = csub + next_n = 2 + cbar_offset = 0 + + if self.debug_dd: + print "dd: reduce c2 to", len(next_c2), "deltas:", + print self.pretty(next_c2) + break + + if t == self.PASS and t2 == self.FAIL: + # Reduce to complement + progress = 1 + next_c1 = csub + next_n = max(next_n - 1, 2) + cbar_offset = i + + if self.debug_dd: + print "dd: increase c1 to", len(next_c1), "deltas:", + print self.pretty(next_c1) + break + + + csub = self.__listminus(c, cs[i]) + (t, csub) = self.test_and_resolve(csub, c1, c, self.ADD) + csub = self.__listunion(c1, csub) + + if t == self.PASS and t2 == self.FAIL: + # Found + progress = 1 + next_c1 = csub + next_n = 2 + cbar_offset = 0 + + if self.debug_dd: + print "dd: increase c1 to", len(next_c1), "deltas:", + print self.pretty(next_c1) + break + + if t == self.FAIL and t1 == self.PASS: + # Increase + progress = 1 + next_c2 = csub + next_n = max(next_n - 1, 2) + cbar_offset = i + + if self.debug_dd: + print "dd: reduce c2 to", len(next_c2), "deltas:", + print self.pretty(next_c2) + break + + if progress: + self.report_progress(self.__listminus(next_c2, next_c1), "dd") + else: + if n >= len(c): + # No further minimizing + print "dd: done" + return (c, c1, c2) + + next_n = min(len(c), n * 2) + print "dd: increase granularity to", next_n + cbar_offset = (cbar_offset * next_n) / n + + c1 = next_c1 + c2 = next_c2 + n = next_n + run = run + 1 + + def dd(self, c): + return self.dddiff(c) # Backwards compatibility + + + + + +if __name__ == '__main__': + # Test the outcome cache + oc_test() + + # Define our own DD class, with its own test method + class MyDD(DD): + def _test_a(self, c): + "Test the configuration C. Return PASS, FAIL, or UNRESOLVED." + + # Just a sample + # if 2 in c and not 3 in c: + # return self.UNRESOLVED + # if 3 in c and not 7 in c: + # return self.UNRESOLVED + if 7 in c and not 2 in c: + return self.UNRESOLVED + if 5 in c and 8 in c: + return self.FAIL + return self.PASS + + def _test_b(self, c): + if c == []: + return self.PASS + if 1 in c and 2 in c and 3 in c and 4 in c and \ + 5 in c and 6 in c and 7 in c and 8 in c: + return self.FAIL + return self.UNRESOLVED + + def _test_c(self, c): + if 1 in c and 2 in c and 3 in c and 4 in c and \ + 6 in c and 8 in c: + if 5 in c and 7 in c: + return self.UNRESOLVED + else: + return self.FAIL + if 1 in c or 2 in c or 3 in c or 4 in c or \ + 6 in c or 8 in c: + return self.UNRESOLVED + return self.PASS + + def __init__(self): + self._test = self._test_c + DD.__init__(self) + + + print "WYNOT - a tool for delta debugging." + mydd = MyDD() + # mydd.debug_test = 1 # Enable debugging output + # mydd.debug_dd = 1 # Enable debugging output + # mydd.debug_split = 1 # Enable debugging output + # mydd.debug_resolve = 1 # Enable debugging output + + # mydd.cache_outcomes = 0 + # mydd.monotony = 0 + + print "Minimizing failure-inducing input..." + c = mydd.ddmin([1, 2, 3, 4, 5, 6, 7, 8]) # Invoke DDMIN + print "The 1-minimal failure-inducing input is", c + print "Removing any element will make the failure go away." + print + + print "Computing the failure-inducing difference..." + (c, c1, c2) = mydd.dd([1, 2, 3, 4, 5, 6, 7, 8]) # Invoke DD + print "The 1-minimal failure-inducing difference is", c + print c1, "passes,", c2, "fails" + + + +# Local Variables: +# mode: python +# End: Modified: lxml/trunk/bisect_crashes.py ============================================================================== --- lxml/trunk/bisect_crashes.py (original) +++ lxml/trunk/bisect_crashes.py Sun Aug 8 18:48:34 2010 @@ -11,6 +11,7 @@ sys.path.insert(1, test_base_path) import test +from DD import DD cfg = test.Options() cfg.verbosity = 0 @@ -27,46 +28,33 @@ test_files = test.get_test_files(cfg) return test.get_test_cases(test_files, cfg) -def run_tests(test_cases): - if not test_cases: - return True - write('Running subset of %d tests [%s .. %s]', - len(test_cases), test_cases[0].id(), test_cases[-1].id()) - pid = os.fork() - if not pid: - # child executes tests - runner = test.CustomTestRunner(cfg, None) - suite = unittest.TestSuite() - suite.addTests(test_cases) - os._exit( not runner.run(suite).wasSuccessful() ) - cid, retval = os.waitpid(pid, 0) - if retval: - write('exit status: %d, signal: %d', retval >> 8, retval % 0xFF) - return (retval % 0xFF) == 0 # signal +class DDTester(DD): + def _test(self, test_cases): + if not test_cases: + return self.PASS + test_cases = [ item[-1] for item in test_cases ] + write('Running subset of %d tests [%s .. %s]', + len(test_cases), test_cases[0].id(), test_cases[-1].id()) + pid = os.fork() + if not pid: + # child executes tests + runner = test.CustomTestRunner(cfg, None) + suite = unittest.TestSuite() + suite.addTests(test_cases) + os._exit( not runner.run(suite).wasSuccessful() ) + cid, retval = os.waitpid(pid, 0) + if retval: + write('exit status: %d, signal: %d', retval >> 8, retval % 0xFF) + if (retval % 0xFF) != 0: # signal received? + return self.FAIL + return self.PASS -def bisect_tests(): +def dd_tests(): tests = find_tests() write('Found %d tests', len(tests)) - shift = len(tests) // 4 - failed = [] - while len(tests) > 1 and shift > 0: - mid = len(tests) // 2 + 1 - left, right = tests[:mid], tests[mid:] - - if not run_tests(left): - failed = left[:] - tests = left - shift = len(tests) // 4 + 1 - elif not run_tests(right): - failed = right[:] - tests = right - shift = len(tests) // 4 + 1 - else: - # retry - shift //= 2 - tests = tests[shift:] + tests[:shift] - # looks like we can't make the set of tests any smaller - return failed + dd = DDTester() + min_tests = dd.ddmin( list(enumerate(tests)) ) + return [ item[-1] for item in min_tests ] if __name__ == '__main__': - write('Failing tests:\n%s', '\n'.join([test.id() for test in bisect_tests()])) + write('Failing tests:\n%s', '\n'.join([test.id() for test in dd_tests()])) From scoder at codespeak.net Sun Aug 8 20:02:44 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 8 Aug 2010 20:02:44 +0200 (CEST) Subject: [Lxml-checkins] r76533 - lxml/trunk Message-ID: <20100808180244.8C669282B90@codespeak.net> Author: scoder Date: Sun Aug 8 20:02:41 2010 New Revision: 76533 Modified: lxml/trunk/DD.py lxml/trunk/bisect_crashes.py Log: fix DD.py for Python 3.x Modified: lxml/trunk/DD.py ============================================================================== --- lxml/trunk/DD.py (original) +++ lxml/trunk/DD.py Sun Aug 8 20:02:41 2010 @@ -42,7 +42,7 @@ # Start with some helpers. -class OutcomeCache: +class OutcomeCache(object): # This class holds test outcomes for configurations. This avoids # running the same test twice. @@ -67,20 +67,20 @@ cs.sort() p = self - for start in range(len(c)): - if not p.tail.has_key(c[start]): - p.tail[c[start]] = OutcomeCache() - p = p.tail[c[start]] + for start in c: + if start not in p.tail: + p.tail[start] = OutcomeCache() + p = p.tail[start] p.result = result def lookup(self, c): """Return RESULT if (C, RESULT) is in the cache; None, otherwise.""" p = self - for start in range(len(c)): - if not p.tail.has_key(c[start]): + for start in c: + if start not in p.tail: return None - p = p.tail[c[start]] + p = p.tail[start] return p.result @@ -94,12 +94,12 @@ return self.result elif self.tail != {}: # Select some superset - superset = self.tail[self.tail.keys()[0]] + superset = self.tail[list(self.tail.keys())[0]] return superset.lookup_superset(c, start + 1) else: return None - if self.tail.has_key(c[start]): + if c[start] in self.tail: return self.tail[c[start]].lookup_superset(c, start + 1) # Let K0 be the largest element in TAIL such that K0 <= C[START] @@ -118,7 +118,7 @@ C' being a subset of C or equal to C. Otherwise, return None.""" p = self for start in range(len(c)): - if p.tail.has_key(c[start]): + if c[start] in p.tail: p = p.tail[c[start]] return p.result @@ -174,7 +174,7 @@ # Main Delta Debugging algorithm. -class DD: +class DD(object): # Delta debugging base class. To use this class for a particular # setting, create a subclass with an overloaded `test()' method. # @@ -200,8 +200,8 @@ UNRESOLVED = "UNRESOLVED" # Resolving directions. - ADD = "ADD" # Add deltas to resolve - REMOVE = "REMOVE" # Remove deltas to resolve + ADD = "ADD" # Add deltas to resolve + REMOVE = "REMOVE" # Remove deltas to resolve # Debugging output (set to 1 to enable) debug_test = 0 @@ -210,8 +210,8 @@ debug_resolve = 0 def __init__(self): - self.__resolving = 0 - self.__last_reported_length = 0 + self.__resolving = 0 + self.__last_reported_length = 0 self.monotony = 0 self.outcome_cache = OutcomeCache() self.cache_outcomes = 1 @@ -221,43 +221,43 @@ # Helpers def __listminus(self, c1, c2): - """Return a list of all elements of C1 that are not in C2.""" + """Return a list of all elements of C1 that are not in C2.""" s2 = {} for delta in c2: s2[delta] = 1 - c = [] - for delta in c1: - if not s2.has_key(delta): - c.append(delta) + c = [] + for delta in c1: + if delta not in s2: + c.append(delta) - return c + return c def __listintersect(self, c1, c2): - """Return the common elements of C1 and C2.""" + """Return the common elements of C1 and C2.""" s2 = {} for delta in c2: s2[delta] = 1 - c = [] - for delta in c1: - if s2.has_key(delta): - c.append(delta) + c = [] + for delta in c1: + if delta in s2: + c.append(delta) - return c + return c def __listunion(self, c1, c2): - """Return the union of C1 and C2.""" + """Return the union of C1 and C2.""" s1 = {} for delta in c1: s1[delta] = 1 - c = c1[:] - for delta in c2: - if not s1.has_key(delta): - c.append(delta) + c = c1[:] + for delta in c2: + if delta not in s1: + c.append(delta) - return c + return c def __listsubseteq(self, c1, c2): """Return 1 if C1 is a subset or equal to C2.""" @@ -266,16 +266,16 @@ s2[delta] = 1 for delta in c1: - if not s2.has_key(delta): + if delta not in s2: return 0 return 1 # Output def coerce(self, c): - """Return the configuration C as a compact string""" - # Default: use printable representation - return `c` + """Return the configuration C as a compact string""" + # Default: use printable representation + return repr(c) def pretty(self, c): """Like coerce(), but sort beforehand""" @@ -285,8 +285,8 @@ # Testing def test(self, c): - """Test the configuration C. Return PASS, FAIL, or UNRESOLVED""" - c.sort() + """Test the configuration C. Return PASS, FAIL, or UNRESOLVED""" + c.sort() # If we had this test before, return its result if self.cache_outcomes: @@ -304,94 +304,92 @@ if cached_result == self.FAIL: return self.FAIL - if self.debug_test: - print - print "test(" + self.coerce(c) + ")..." + if self.debug_test: + print('') + print("test(%s)..." % (self.coerce(c),)) - outcome = self._test(c) + outcome = self._test(c) - if self.debug_test: - print "test(" + self.coerce(c) + ") = " + `outcome` + if self.debug_test: + print("test(%s) = %r" % (self.coerce(c), outcome)) if self.cache_outcomes: self.outcome_cache.add(c, outcome) - return outcome + return outcome def _test(self, c): - """Stub to overload in subclasses""" - return self.UNRESOLVED # Placeholder + """Stub to overload in subclasses""" + return self.UNRESOLVED # Placeholder # Splitting def split(self, c, n): - """Split C into [C_1, C_2, ..., C_n].""" - if self.debug_split: - print "split(" + self.coerce(c) + ", " + `n` + ")..." + """Split C into [C_1, C_2, ..., C_n].""" + if self.debug_split: + print("split(%s, %r)..." % (self.coerce(c), n)) - outcome = self._split(c, n) + outcome = self._split(c, n) - if self.debug_split: - print "split(" + self.coerce(c) + ", " + `n` + ") = " + `outcome` + if self.debug_split: + print("split(%s, %r) = %r" % (self.coerce(c), n, outcome)) - return outcome + return outcome def _split(self, c, n): - """Stub to overload in subclasses""" - subsets = [] - start = 0 - for i in range(n): - subset = c[start:start + (len(c) - start) / (n - i)] - subsets.append(subset) - start = start + len(subset) - return subsets + """Stub to overload in subclasses""" + subsets = [] + start = 0 + for i in range(n): + subset = c[start:start + (len(c) - start) // (n - i)] + subsets.append(subset) + start = start + len(subset) + return subsets # Resolving def resolve(self, csub, c, direction): - """If direction == ADD, resolve inconsistency by adding deltas - to CSUB. Otherwise, resolve by removing deltas from CSUB.""" + """If direction == ADD, resolve inconsistency by adding deltas + to CSUB. Otherwise, resolve by removing deltas from CSUB.""" - if self.debug_resolve: - print "resolve(" + `csub` + ", " + self.coerce(c) + ", " + \ - `direction` + ")..." + if self.debug_resolve: + print("resolve(%r, %s, %r)..." % (csub, self.coerce(c), direction)) - outcome = self._resolve(csub, c, direction) + outcome = self._resolve(csub, c, direction) - if self.debug_resolve: - print "resolve(" + `csub` + ", " + self.coerce(c) + ", " + \ - `direction` + ") = " + `outcome` + if self.debug_resolve: + print("resolve(%r, %s, %r) = %r" % (csub, self.coerce(c), direction, outcome)) - return outcome + return outcome def _resolve(self, csub, c, direction): - """Stub to overload in subclasses.""" - # By default, no way to resolve - return None + """Stub to overload in subclasses.""" + # By default, no way to resolve + return None # Test with fixes def test_and_resolve(self, csub, r, c, direction): - """Repeat testing CSUB + R while unresolved.""" + """Repeat testing CSUB + R while unresolved.""" - initial_csub = csub[:] + initial_csub = csub[:] c2 = self.__listunion(r, c) csubr = self.__listunion(csub, r) - t = self.test(csubr) + t = self.test(csubr) # necessary to use more resolving mechanisms which can reverse each # other, can (but needn't) be used in subclasses self._resolve_type = 0 - while t == self.UNRESOLVED: - self.__resolving = 1 - csubr = self.resolve(csubr, c, direction) - - if csubr == None: - # Nothing left to resolve - break + while t == self.UNRESOLVED: + self.__resolving = 1 + csubr = self.resolve(csubr, c, direction) + + if csubr == None: + # Nothing left to resolve + break if len(csubr) >= len(c2): # Added everything: csub == c2. ("Upper" Baseline) @@ -405,50 +403,49 @@ csubr = None break - t = self.test(csubr) + t = self.test(csubr) - self.__resolving = 0 - if csubr == None: - return self.UNRESOLVED, initial_csub + self.__resolving = 0 + if csubr == None: + return self.UNRESOLVED, initial_csub - # assert t == self.PASS or t == self.FAIL + # assert t == self.PASS or t == self.FAIL csub = self.__listminus(csubr, r) - return t, csub + return t, csub # Inquiries def resolving(self): - """Return 1 while resolving.""" - return self.__resolving + """Return 1 while resolving.""" + return self.__resolving # Logging def report_progress(self, c, title): - if len(c) != self.__last_reported_length: - print - print title + ": " + `len(c)` + " deltas left:", self.coerce(c) - self.__last_reported_length = len(c) + if len(c) != self.__last_reported_length: + print('') + print("%s: %d deltas left: %s" % (title, len(c), self.coerce(c))) + self.__last_reported_length = len(c) # Delta Debugging (old ESEC/FSE version) def old_dd(self, c, r = [], n = 2): - """Return the failure-inducing subset of C""" + """Return the failure-inducing subset of C""" assert self.test([]) == dd.PASS assert self.test(c) == dd.FAIL - if self.debug_dd: - print ("dd(" + self.pretty(c) + ", " + `r` + ", " + `n` + ")...") + if self.debug_dd: + print("dd(%s, %r, %r)..." % (self.pretty(c), r, n)) - outcome = self._old_dd(c, r, n) + outcome = self._old_dd(c, r, n) - if self.debug_dd: - print ("dd(" + self.pretty(c) + ", " + `r` + ", " + `n` + - ") = " + `outcome`) + if self.debug_dd: + print("dd(%s, %r, %r) = %r" % (self.pretty(c), r, n, outcome)) - return outcome + return outcome def _old_dd(self, c, r, n): - """Stub to overload in subclasses""" + """Stub to overload in subclasses""" if r == []: assert self.test([]) == self.PASS @@ -459,105 +456,97 @@ assert self.__listintersect(c, r) == [] - if len(c) == 1: - # Nothing to split - return c - - run = 1 - next_c = c[:] - next_r = r[:] - - # We replace the tail recursion from the paper by a loop - while 1: - self.report_progress(c, "dd") - - cs = self.split(c, n) - - print - print "dd (run #" + `run` + "): trying", - for i in range(n): - if i > 0: - print "+", - print len(cs[i]), - print - - # Check subsets - ts = [] - for i in range(n): + if len(c) == 1: + # Nothing to split + return c + + run = 1 + next_c = c[:] + next_r = r[:] + + # We replace the tail recursion from the paper by a loop + while 1: + self.report_progress(c, "dd") + + cs = self.split(c, n) + + print('') + print("dd (run #%r): trying %s" % (run, ' + '.join(map(str, cs)))) + print('') + + # Check subsets + ts = [] + for i in range(n): if self.debug_dd: - print "dd: trying cs[" + `i` + "] =", self.pretty(cs[i]) + print("dd: trying cs[%d] = %s" % (i, self.pretty(cs[i]))) - t, cs[i] = self.test_and_resolve(cs[i], r, c, self.REMOVE) - ts.append(t) - if t == self.FAIL: - # Found + t, cs[i] = self.test_and_resolve(cs[i], r, c, self.REMOVE) + ts.append(t) + if t == self.FAIL: + # Found if self.debug_dd: - print "dd: found", len(cs[i]), "deltas:", - print self.pretty(cs[i]) + print("dd: found %d deltas: %s" % (len(cs[i]), self.pretty(cs[i]))) return self.dd(cs[i], r) - # Check complements - cbars = [] - tbars = [] - - for i in range(n): - cbar = self.__listminus(c, cs[i] + r) - tbar, cbar = self.test_and_resolve(cbar, r, c, self.ADD) + # Check complements + cbars = [] + tbars = [] + + for i in range(n): + cbar = self.__listminus(c, cs[i] + r) + tbar, cbar = self.test_and_resolve(cbar, r, c, self.ADD) doubled = self.__listintersect(cbar, cs[i]) if doubled != []: - cs[i] = self.__listminus(cs[i], doubled) + cs[i] = self.__listminus(cs[i], doubled) - cbars.append(cbar) - tbars.append(tbar) + cbars.append(cbar) + tbars.append(tbar) - if ts[i] == self.PASS and tbars[i] == self.PASS: - # Interference + if ts[i] == self.PASS and tbars[i] == self.PASS: + # Interference if self.debug_dd: - print "dd: interference of", self.pretty(cs[i]), - print "and", self.pretty(cbars[i]) + print("dd: interference of %s and %s" % (self.pretty(cs[i]), self.pretty(cbars[i]))) - d = self.dd(cs[i][:], cbars[i] + r) - dbar = self.dd(cbars[i][:], cs[i] + r) - return d + dbar + d = self.dd(cs[i][:], cbars[i] + r) + dbar = self.dd(cbars[i][:], cs[i] + r) + return d + dbar - if ts[i] == self.UNRESOLVED and tbars[i] == self.PASS: - # Preference + if ts[i] == self.UNRESOLVED and tbars[i] == self.PASS: + # Preference if self.debug_dd: - print "dd: preferring", len(cs[i]), "deltas:", - print self.pretty(cs[i]) + print("dd: preferring %d deltas: %s" % (len(cs[i]), self.pretty(cs[i]))) - return self.dd(cs[i][:], cbars[i] + r) + return self.dd(cs[i][:], cbars[i] + r) - if ts[i] == self.PASS or tbars[i] == self.FAIL: + if ts[i] == self.PASS or tbars[i] == self.FAIL: if self.debug_dd: excluded = self.__listminus(next_c, cbars[i]) - print "dd: excluding", len(excluded), "deltas:", - print self.pretty(excluded) + print("dd: excluding %d deltas: %s" % (len(excluded), self.pretty(excluded))) if ts[i] == self.PASS: next_r = self.__listunion(next_r, cs[i]) - next_c = self.__listintersect(next_c, cbars[i]) - self.report_progress(next_c, "dd") + next_c = self.__listintersect(next_c, cbars[i]) + self.report_progress(next_c, "dd") next_n = min(len(next_c), n * 2) - if next_n == n and next_c[:] == c[:] and next_r[:] == r[:]: - # Nothing left + if next_n == n and next_c[:] == c[:] and next_r[:] == r[:]: + # Nothing left if self.debug_dd: - print "dd: nothing left" - return next_c + print("dd: nothing left") + return next_c # Try again if self.debug_dd: - print "dd: try again" + print("dd: try again") - c = next_c - r = next_r - n = next_n - run = run + 1 + c = next_c + r = next_r + n = next_n + run = run + 1 def test_mix(self, csub, c, direction): @@ -591,7 +580,7 @@ # Delta Debugging (new ISSTA version) def ddgen(self, c, minimize, maximize): - """Return a 1-minimal failing subset of C""" + """Return a 1-minimal failing subset of C""" self.minimize = minimize self.maximize = maximize @@ -599,45 +588,41 @@ n = 2 self.CC = c - if self.debug_dd: - print ("dd(" + self.pretty(c) + ", " + `n` + ")...") + if self.debug_dd: + print("dd(%s, %r)..." % (self.pretty(c), n)) - outcome = self._dd(c, n) + outcome = self._dd(c, n) - if self.debug_dd: - print ("dd(" + self.pretty(c) + ", " + `n` + ") = " + `outcome`) + if self.debug_dd: + print("dd(%s, %r) = %r" % (self.pretty(c), n, outcome)) - return outcome + return outcome def _dd(self, c, n): - """Stub to overload in subclasses""" + """Stub to overload in subclasses""" assert self.test([]) == self.PASS - run = 1 + run = 1 cbar_offset = 0 - # We replace the tail recursion from the paper by a loop - while 1: + # We replace the tail recursion from the paper by a loop + while 1: tc = self.test(c) assert tc == self.FAIL or tc == self.UNRESOLVED if n > len(c): # No further minimizing - print "dd: done" + print("dd: done") return c - self.report_progress(c, "dd") + self.report_progress(c, "dd") - cs = self.split(c, n) + cs = self.split(c, n) - print - print "dd (run #" + `run` + "): trying", - for i in range(n): - if i > 0: - print "+", - print len(cs[i]), - print + print('') + print("dd (run #%d): trying %s" % (run, ' + '.join(map(str, cs)))) + print('') c_failed = 0 cbar_failed = 0 @@ -645,18 +630,17 @@ next_c = c[:] next_n = n - # Check subsets - for i in range(n): + # Check subsets + for i in range(n): if self.debug_dd: - print "dd: trying", self.pretty(cs[i]) + print("dd: trying %s" % (self.pretty(cs[i]),)) (t, cs[i]) = self.test_mix(cs[i], c, self.REMOVE) if t == self.FAIL: # Found if self.debug_dd: - print "dd: found", len(cs[i]), "deltas:", - print self.pretty(cs[i]) + print("dd: found %d deltas: %s" % (len(cs[i]), self.pretty(cs[i]))) c_failed = 1 next_c = cs[i] @@ -672,7 +656,7 @@ # print "cbar_offset =", cbar_offset for j in range(n): - i = (j + cbar_offset) % n + i = int((j + cbar_offset) % n) cbars[i] = self.__listminus(c, cs[i]) t, cbars[i] = self.test_mix(cbars[i], c, self.ADD) @@ -682,9 +666,7 @@ if t == self.FAIL: if self.debug_dd: - print "dd: reduced to", len(cbars[i]), - print "deltas:", - print self.pretty(cbars[i]) + print("dd: reduced to %d deltas: %s" % (len(cbars[i]), self.pretty(cbars[i]))) cbar_failed = 1 next_c = self.__listintersect(next_c, cbars[i]) @@ -698,16 +680,16 @@ if not c_failed and not cbar_failed: if n >= len(c): # No further minimizing - print "dd: done" + print("dd: done") return c next_n = min(len(c), n * 2) - print "dd: increase granularity to", next_n + print("dd: increase granularity to %d" % next_n) cbar_offset = (cbar_offset * next_n) / n c = next_c n = next_n - run = run + 1 + run = run + 1 def ddmin(self, c): return self.ddgen(c, 1, 0) @@ -723,26 +705,25 @@ def dddiff(self, c): n = 2 - if self.debug_dd: - print ("dddiff(" + self.pretty(c) + ", " + `n` + ")...") + if self.debug_dd: + print("dddiff(%s, %d)..." % (self.pretty(c), n)) - outcome = self._dddiff([], c, n) + outcome = self._dddiff([], c, n) - if self.debug_dd: - print ("dddiff(" + self.pretty(c) + ", " + `n` + ") = " + - `outcome`) + if self.debug_dd: + print("dddiff(%s, %d) = %r" % (self.pretty(c), n, outcome)) - return outcome + return outcome def _dddiff(self, c1, c2, n): - run = 1 + run = 1 cbar_offset = 0 - # We replace the tail recursion from the paper by a loop - while 1: + # We replace the tail recursion from the paper by a loop + while 1: if self.debug_dd: - print "dd: c1 =", self.pretty(c1) - print "dd: c2 =", self.pretty(c2) + print("dd: c1 = %s" % (self.pretty(c1),)) + print("dd: c2 = %s" % (self.pretty(c2),)) if self.assume_axioms_hold: t1 = self.PASS @@ -758,24 +739,20 @@ c = self.__listminus(c2, c1) if self.debug_dd: - print "dd: c2 - c1 =", self.pretty(c) + print("dd: c2 - c1 = %s" % (self.pretty(c),)) if n > len(c): # No further minimizing - print "dd: done" + print("dd: done") return (c, c1, c2) - self.report_progress(c, "dd") + self.report_progress(c, "dd") - cs = self.split(c, n) + cs = self.split(c, n) - print - print "dd (run #" + `run` + "): trying", - for i in range(n): - if i > 0: - print "+", - print len(cs[i]), - print + print('') + print("dd (run #%d): trying %s" % (run, ' + '.join(map(str, cs)))) + print('') progress = 0 @@ -783,12 +760,12 @@ next_c2 = c2[:] next_n = n - # Check subsets + # Check subsets for j in range(n): - i = (j + cbar_offset) % n + i = int((j + cbar_offset) % n) if self.debug_dd: - print "dd: trying", self.pretty(cs[i]) + print("dd: trying %s" % (self.pretty(cs[i]),)) (t, csub) = self.test_and_resolve(cs[i], c1, c, self.REMOVE) csub = self.__listunion(c1, csub) @@ -801,8 +778,7 @@ cbar_offset = 0 if self.debug_dd: - print "dd: reduce c2 to", len(next_c2), "deltas:", - print self.pretty(next_c2) + print("dd: reduce c2 to %d deltas: %s" % (len(next_c2), self.pretty(next_c2))) break if t == self.PASS and t2 == self.FAIL: @@ -813,8 +789,7 @@ cbar_offset = i if self.debug_dd: - print "dd: increase c1 to", len(next_c1), "deltas:", - print self.pretty(next_c1) + print("dd: increase c1 to %d deltas: %s", (len(next_c1), self.pretty(next_c1))) break @@ -830,8 +805,7 @@ cbar_offset = 0 if self.debug_dd: - print "dd: increase c1 to", len(next_c1), "deltas:", - print self.pretty(next_c1) + print("dd: increase c1 to %d deltas: %s" % (len(next_c1), self.pretty(next_c1))) break if t == self.FAIL and t1 == self.PASS: @@ -842,8 +816,7 @@ cbar_offset = i if self.debug_dd: - print "dd: reduce c2 to", len(next_c2), "deltas:", - print self.pretty(next_c2) + print("dd: reduce c2 to %d deltas: %s" % (len(next_c2), self.pretty(next_c2))) break if progress: @@ -851,22 +824,22 @@ else: if n >= len(c): # No further minimizing - print "dd: done" + print("dd: done") return (c, c1, c2) next_n = min(len(c), n * 2) - print "dd: increase granularity to", next_n + print("dd: increase granularity to %d" % next_n) cbar_offset = (cbar_offset * next_n) / n c1 = next_c1 c2 = next_c2 n = next_n - run = run + 1 + run = run + 1 def dd(self, c): return self.dddiff(c) # Backwards compatibility - + @@ -876,65 +849,65 @@ # Define our own DD class, with its own test method class MyDD(DD): - def _test_a(self, c): - "Test the configuration C. Return PASS, FAIL, or UNRESOLVED." + def _test_a(self, c): + "Test the configuration C. Return PASS, FAIL, or UNRESOLVED." - # Just a sample - # if 2 in c and not 3 in c: - # return self.UNRESOLVED - # if 3 in c and not 7 in c: + # Just a sample + # if 2 in c and not 3 in c: + # return self.UNRESOLVED + # if 3 in c and not 7 in c: # return self.UNRESOLVED - if 7 in c and not 2 in c: - return self.UNRESOLVED - if 5 in c and 8 in c: - return self.FAIL - return self.PASS - - def _test_b(self, c): - if c == []: - return self.PASS - if 1 in c and 2 in c and 3 in c and 4 in c and \ - 5 in c and 6 in c and 7 in c and 8 in c: - return self.FAIL - return self.UNRESOLVED - - def _test_c(self, c): - if 1 in c and 2 in c and 3 in c and 4 in c and \ - 6 in c and 8 in c: + if 7 in c and not 2 in c: + return self.UNRESOLVED + if 5 in c and 8 in c: + return self.FAIL + return self.PASS + + def _test_b(self, c): + if c == []: + return self.PASS + if 1 in c and 2 in c and 3 in c and 4 in c and \ + 5 in c and 6 in c and 7 in c and 8 in c: + return self.FAIL + return self.UNRESOLVED + + def _test_c(self, c): + if 1 in c and 2 in c and 3 in c and 4 in c and \ + 6 in c and 8 in c: if 5 in c and 7 in c: return self.UNRESOLVED else: return self.FAIL - if 1 in c or 2 in c or 3 in c or 4 in c or \ - 6 in c or 8 in c: + if 1 in c or 2 in c or 3 in c or 4 in c or \ + 6 in c or 8 in c: return self.UNRESOLVED return self.PASS - def __init__(self): - self._test = self._test_c + def __init__(self): + self._test = self._test_c DD.__init__(self) - print "WYNOT - a tool for delta debugging." + print("WYNOT - a tool for delta debugging.") mydd = MyDD() - # mydd.debug_test = 1 # Enable debugging output - # mydd.debug_dd = 1 # Enable debugging output - # mydd.debug_split = 1 # Enable debugging output - # mydd.debug_resolve = 1 # Enable debugging output + # mydd.debug_test = 1 # Enable debugging output + # mydd.debug_dd = 1 # Enable debugging output + # mydd.debug_split = 1 # Enable debugging output + # mydd.debug_resolve = 1 # Enable debugging output # mydd.cache_outcomes = 0 # mydd.monotony = 0 - print "Minimizing failure-inducing input..." + print("Minimizing failure-inducing input...") c = mydd.ddmin([1, 2, 3, 4, 5, 6, 7, 8]) # Invoke DDMIN - print "The 1-minimal failure-inducing input is", c - print "Removing any element will make the failure go away." - print + print("The 1-minimal failure-inducing input is %s" % (c,)) + print("Removing any element will make the failure go away.") + print('') - print "Computing the failure-inducing difference..." - (c, c1, c2) = mydd.dd([1, 2, 3, 4, 5, 6, 7, 8]) # Invoke DD - print "The 1-minimal failure-inducing difference is", c - print c1, "passes,", c2, "fails" + print("Computing the failure-inducing difference...") + (c, c1, c2) = mydd.dd([1, 2, 3, 4, 5, 6, 7, 8]) # Invoke DD + print("The 1-minimal failure-inducing difference is %s" % (c,)) + print("%s passes, %s fails" % (c1, c2)) Modified: lxml/trunk/bisect_crashes.py ============================================================================== --- lxml/trunk/bisect_crashes.py (original) +++ lxml/trunk/bisect_crashes.py Sun Aug 8 20:02:41 2010 @@ -33,8 +33,8 @@ if not test_cases: return self.PASS test_cases = [ item[-1] for item in test_cases ] - write('Running subset of %d tests [%s .. %s]', - len(test_cases), test_cases[0].id(), test_cases[-1].id()) + write('Running subset of %d tests %s', + len(test_cases), self.coerce(test_cases)) pid = os.fork() if not pid: # child executes tests @@ -49,6 +49,11 @@ return self.FAIL return self.PASS + def coerce(self, test_cases): + if not test_cases: + return '[]' + return '[%s .. %s]' % (test_cases[0].id(), test_cases[-1].id()) + def dd_tests(): tests = find_tests() write('Found %d tests', len(tests)) From scoder at codespeak.net Sun Aug 8 20:08:03 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 8 Aug 2010 20:08:03 +0200 (CEST) Subject: [Lxml-checkins] r76534 - lxml/trunk Message-ID: <20100808180803.02B6B282B90@codespeak.net> Author: scoder Date: Sun Aug 8 20:07:59 2010 New Revision: 76534 Modified: lxml/trunk/bisect_crashes.py Log: fix bisect test output Modified: lxml/trunk/bisect_crashes.py ============================================================================== --- lxml/trunk/bisect_crashes.py (original) +++ lxml/trunk/bisect_crashes.py Sun Aug 8 20:07:59 2010 @@ -32,9 +32,9 @@ def _test(self, test_cases): if not test_cases: return self.PASS - test_cases = [ item[-1] for item in test_cases ] write('Running subset of %d tests %s', len(test_cases), self.coerce(test_cases)) + test_cases = [ item[-1] for item in test_cases ] pid = os.fork() if not pid: # child executes tests @@ -52,6 +52,7 @@ def coerce(self, test_cases): if not test_cases: return '[]' + test_cases = [ item[-1] for item in test_cases ] return '[%s .. %s]' % (test_cases[0].id(), test_cases[-1].id()) def dd_tests(): From scoder at codespeak.net Sun Aug 8 20:34:06 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 8 Aug 2010 20:34:06 +0200 (CEST) Subject: [Lxml-checkins] r76535 - lxml/trunk/src/lxml/tests Message-ID: <20100808183406.0E31B282B90@codespeak.net> Author: scoder Date: Sun Aug 8 20:34:02 2010 New Revision: 76535 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_etree.py Log: disable test for ET that fails in Py3 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Sun Aug 8 20:34:02 2010 @@ -526,20 +526,6 @@ except AssertionError: self.assertEquals(alternative, str(root.attrib)) - def test_attribute_has_key(self): - XML = self.etree.XML - - root = XML(_bytes('')) - self.assertEquals( - True, root.attrib.has_key('bar')) - self.assertEquals( - False, root.attrib.has_key('baz')) - self.assertEquals( - False, root.attrib.has_key('hah')) - self.assertEquals( - True, - root.attrib.has_key('{http://ns.codespeak.net/test}baz')) - def test_attribute_contains(self): XML = self.etree.XML Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Sun Aug 8 20:34:02 2010 @@ -207,6 +207,21 @@ self.assertRaises(ValueError, etree.Element, "root", nsmap={'a:b' : 'testns'}) + def test_attribute_has_key(self): + # ET in Py 3.x has no "attrib.has_key()" method + XML = self.etree.XML + + root = XML(_bytes('')) + self.assertEquals( + True, root.attrib.has_key('bar')) + self.assertEquals( + False, root.attrib.has_key('baz')) + self.assertEquals( + False, root.attrib.has_key('hah')) + self.assertEquals( + True, + root.attrib.has_key('{http://ns.codespeak.net/test}baz')) + def test_attribute_set(self): Element = self.etree.Element root = Element("root") From scoder at codespeak.net Sun Aug 8 20:34:07 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 8 Aug 2010 20:34:07 +0200 (CEST) Subject: [Lxml-checkins] r76536 - lxml/trunk Message-ID: <20100808183407.DB032282BD4@codespeak.net> Author: scoder Date: Sun Aug 8 20:34:06 2010 New Revision: 76536 Modified: lxml/trunk/bisect_crashes.py Log: ignore more signals when checking for crashes Modified: lxml/trunk/bisect_crashes.py ============================================================================== --- lxml/trunk/bisect_crashes.py (original) +++ lxml/trunk/bisect_crashes.py Sun Aug 8 20:34:06 2010 @@ -45,7 +45,7 @@ cid, retval = os.waitpid(pid, 0) if retval: write('exit status: %d, signal: %d', retval >> 8, retval % 0xFF) - if (retval % 0xFF) != 0: # signal received? + if (retval % 0xFF) > 2: # signal received? return self.FAIL return self.PASS From scoder at codespeak.net Sun Aug 8 21:19:03 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 8 Aug 2010 21:19:03 +0200 (CEST) Subject: [Lxml-checkins] r76537 - lxml/trunk/src/lxml/tests Message-ID: <20100808191903.78F39282B90@codespeak.net> Author: scoder Date: Sun Aug 8 21:18:31 2010 New Revision: 76537 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py Log: Py3 test fix Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Sun Aug 8 21:18:31 2010 @@ -2195,7 +2195,7 @@ self.assertEquals( [['{ns3}a3'], ['{ns2}a2'], ['{ns1}a1']], - [ child.attrib.keys() for child in a ]) + [ list(child.attrib.keys()) for child in a ]) def test_setslice_all_replace_reversed_ns2(self): Element = self.etree.Element @@ -2217,7 +2217,7 @@ self.assertEquals( [['{ns}a3'], ['{ns}a2'], ['{ns}a1']], - [ child.attrib.keys() for child in a ]) + [ list(child.attrib.keys()) for child in a ]) def test_setslice_end(self): Element = self.etree.Element From scoder at codespeak.net Sun Aug 8 21:20:34 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 8 Aug 2010 21:20:34 +0200 (CEST) Subject: [Lxml-checkins] r76538 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20100808192034.8713C282B90@codespeak.net> Author: scoder Date: Sun Aug 8 21:20:17 2010 New Revision: 76538 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/iterparse.pxi lxml/trunk/src/lxml/tests/test_etree.py Log: fix searching for wildcard tags in Py3 Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Aug 8 21:20:17 2010 @@ -11,6 +11,8 @@ Bugs fixed ---------- +* Searching for wildcard tags in ``iterparse()`` was broken in Py3. + * ``lxml.html.open_in_browser()`` didn't work in Python 3 due to the use of os.tempnam. It now takes an optional 'encoding' parameter. Modified: lxml/trunk/src/lxml/iterparse.pxi ============================================================================== --- lxml/trunk/src/lxml/iterparse.pxi (original) +++ lxml/trunk/src/lxml/iterparse.pxi Sun Aug 8 21:20:17 2010 @@ -127,11 +127,11 @@ else: self._tag_tuple = _getNsTag(tag) href, name = self._tag_tuple - if href is None or href == '*': + if href is None or href == b'*': self._tag_href = NULL else: self._tag_href = _cstr(href) - if name is None or name == '*': + if name is None or name == b'*': self._tag_name = NULL else: self._tag_name = _cstr(name) @@ -562,11 +562,11 @@ else: self._tag_tuple = _getNsTag(tag) href, name = self._tag_tuple - if href is None or href == u'*': + if href is None or href == b'*': self._tag_href = NULL else: self._tag_href = _cstr(href) - if name is None or name == u'*': + if name is None or name == b'*': self._tag_name = NULL else: self._tag_name = _cstr(name) Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Sun Aug 8 21:20:17 2010 @@ -649,6 +649,27 @@ 8, len(events)) + def test_iterparse_tag_ns(self): + iterparse = self.etree.iterparse + f = BytesIO('') + + iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end')) + events = list(iterator) + root = iterator.root + self.assertEquals( + [('start', root[0]), ('end', root[0])], + events) + + def test_iterparse_tag_ns_all(self): + iterparse = self.etree.iterparse + f = BytesIO('') + + iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end')) + events = list(iterator) + self.assertEquals( + 8, + len(events)) + def test_iterparse_encoding_error(self): text = _str('S?k p? nettet') wrong_declaration = "" From scoder at codespeak.net Mon Aug 9 08:37:12 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 9 Aug 2010 08:37:12 +0200 (CEST) Subject: [Lxml-checkins] r76539 - in lxml/trunk: . src/lxml Message-ID: <20100809063712.650C1282B90@codespeak.net> Author: scoder Date: Mon Aug 9 08:37:09 2010 New Revision: 76539 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/xmlid.pxi Log: add missing 'parser' and 'base_url' parameters to XMLID() Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Aug 9 08:37:09 2010 @@ -11,6 +11,9 @@ Bugs fixed ---------- +* ``XMLID()`` function was missing the optional ``parser`` and + ``base_url`` parameters. + * Searching for wildcard tags in ``iterparse()`` was broken in Py3. * ``lxml.html.open_in_browser()`` didn't work in Python 3 due to the Modified: lxml/trunk/src/lxml/xmlid.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlid.pxi (original) +++ lxml/trunk/src/lxml/xmlid.pxi Mon Aug 9 08:37:09 2010 @@ -1,7 +1,7 @@ cdef object _find_id_attributes -def XMLID(text): - u"""XMLID(text) +def XMLID(text, parser=None, *, base_url=None): + u"""XMLID(text, parser=None, base_url=None) Parse the text and return a tuple (root node, ID dictionary). The root node is the same as returned by the XML() function. The dictionary @@ -15,14 +15,14 @@ _find_id_attributes = XPath(u'//*[string(@id)]') # ElementTree compatible implementation: parse and look for 'id' attributes - root = XML(text) + root = XML(text, parser, base_url=base_url) dic = {} for elem in _find_id_attributes(root): dic[elem.get(u'id')] = elem return (root, dic) -def XMLDTDID(text): - u"""XMLDTDID(text) +def XMLDTDID(text, parser=None, *, base_url=None): + u"""XMLDTDID(text, parser=None, base_url=None) Parse the text and return a tuple (root node, ID dictionary). The root node is the same as returned by the XML() function. The dictionary @@ -34,7 +34,7 @@ The results are undefined. """ cdef _Element root - root = XML(text) + root = XML(text, parser, base_url=base_url) # xml:id spec compatible implementation: use DTD ID attributes from libxml2 if root._doc._c_doc.ids is NULL: return (root, {}) From lxml-checkins at codespeak.net Mon Aug 9 14:15:42 2010 From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net) Date: Mon, 9 Aug 2010 14:15:42 +0200 (CEST) Subject: [Lxml-checkins] lxml-checkins@codespeak.net 65% OFF on Pfizer! Message-ID: <20100809121542.C2D90282C01@codespeak.net> http://groups.yahoo.com/group/adomdeutschera/message From lxml-checkins at codespeak.net Mon Aug 9 16:36:04 2010 From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net) Date: Mon, 9 Aug 2010 16:36:04 +0200 (CEST) Subject: [Lxml-checkins] lxml-checkins@codespeak.net 63% OFF on Pfizer! Message-ID: <20100809143604.0C114282B9C@codespeak.net> http://groups.yahoo.com/group/nehwlruxlmyb/message From lxml-checkins at codespeak.net Mon Aug 16 10:22:21 2010 From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net) Date: Mon, 16 Aug 2010 10:22:21 +0200 (CEST) Subject: lxml-checkins@codespeak.net VIAGRA ® Official Seller -43% Message-ID: <20100816081854.2902.qmail@77-52-40-70.dialup.umc.net.ua> Dear lxml-checkins at codespeak.net Get ready to make her happy. Discount price store: ID3768035 http://groups.yahoo.com/group/qjttrjghait/message We do guarantee high-quality medications, instant worldwide delivery and friendly support. ? 2001-2010 Pfizer Inc. All rights reserved. From scoder at codespeak.net Wed Aug 18 14:36:46 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 18 Aug 2010 14:36:46 +0200 (CEST) Subject: [Lxml-checkins] r76663 - lxml/branch/lxml-2.2 Message-ID: <20100818123646.A271F282BEF@codespeak.net> Author: scoder Date: Wed Aug 18 14:36:44 2010 New Revision: 76663 Modified: lxml/branch/lxml-2.2/buildlibxml.py Log: import fix for Py3.1+ Modified: lxml/branch/lxml-2.2/buildlibxml.py ============================================================================== --- lxml/branch/lxml-2.2/buildlibxml.py (original) +++ lxml/branch/lxml-2.2/buildlibxml.py Wed Aug 18 14:36:44 2010 @@ -5,7 +5,7 @@ from urlparse import urlsplit, urljoin from urllib import urlretrieve except ImportError: - from urllib.parse import urlsplit + from urllib.parse import urlsplit, urljoin from urllib.request import urlretrieve ## Routines to download and build libxml2/xslt: From scoder at codespeak.net Tue Aug 24 12:40:39 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Aug 2010 12:40:39 +0200 (CEST) Subject: [Lxml-checkins] r76703 - lxml/trunk/src/lxml Message-ID: <20100824104039.CFDA5282B90@codespeak.net> Author: scoder Date: Tue Aug 24 12:40:34 2010 New Revision: 76703 Modified: lxml/trunk/src/lxml/lxml.etree.pyx Log: minor fix to avoid an unnecessary global reference Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Aug 24 12:40:34 2010 @@ -179,8 +179,8 @@ else: self.error_log = error_log.copy() -cdef object _Error = Error -cdef object error_super_init = Error.__init__ +cdef object _Error = Error if python.PY_VERSION_HEX >= 0x02050000 else None +cdef object error_super_init = Error.__init__ if python.PY_VERSION_HEX < 0x02050000 else None # superclass for all syntax errors From scoder at codespeak.net Tue Aug 24 12:40:41 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Aug 2010 12:40:41 +0200 (CEST) Subject: [Lxml-checkins] r76704 - lxml/trunk/src/lxml Message-ID: <20100824104041.0B8D9282BAD@codespeak.net> Author: scoder Date: Tue Aug 24 12:40:40 2010 New Revision: 76704 Modified: lxml/trunk/src/lxml/proxy.pxi Log: fix attribute names also for XInclude start nodes Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Tue Aug 24 12:40:40 2010 @@ -423,7 +423,7 @@ tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1) if c_node.name is not NULL: fixThreadDictNameForNode(c_node, c_src_dict, c_dict) - if c_node.type == tree.XML_ELEMENT_NODE: + if c_node.type in (tree.XML_ELEMENT_NODE, tree.XML_XINCLUDE_START): fixThreadDictNamesForAttributes( c_node.properties, c_src_dict, c_dict) elif c_node.type == tree.XML_TEXT_NODE: From scoder at codespeak.net Tue Aug 24 12:55:36 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Aug 2010 12:55:36 +0200 (CEST) Subject: [Lxml-checkins] r76705 - in lxml/branch/lxml-2.2: . src/lxml Message-ID: <20100824105536.25EAD282BD4@codespeak.net> Author: scoder Date: Tue Aug 24 12:55:10 2010 New Revision: 76705 Modified: lxml/branch/lxml-2.2/CHANGES.txt lxml/branch/lxml-2.2/src/lxml/proxy.pxi Log: fix crash due to recent XInclude changes in libxml2 Modified: lxml/branch/lxml-2.2/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.2/CHANGES.txt (original) +++ lxml/branch/lxml-2.2/CHANGES.txt Tue Aug 24 12:55:10 2010 @@ -2,6 +2,16 @@ lxml changelog ============== +2.2.8 (2010-) +================== + +Bugs fixed +---------- + +* Crash in newer libxml2 versions when moving elements between + documents that had attributes on replaced XInclude nodes. + + 2.2.7 (2010-07-24) ================== Modified: lxml/branch/lxml-2.2/src/lxml/proxy.pxi ============================================================================== --- lxml/branch/lxml-2.2/src/lxml/proxy.pxi (original) +++ lxml/branch/lxml-2.2/src/lxml/proxy.pxi Tue Aug 24 12:55:10 2010 @@ -420,9 +420,10 @@ tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1) if c_node.name is not NULL: fixThreadDictNameForNode(c_node, c_src_dict, c_dict) - if c_node.type == tree.XML_ELEMENT_NODE: + if c_node.type == tree.XML_ELEMENT_NODE or c_node.type == tree.XML_XINCLUDE_START: fixThreadDictNamesForAttributes( c_node.properties, c_src_dict, c_dict) + fixThreadDictNsForNode(c_node, c_src_dict, c_dict) elif c_node.type == tree.XML_TEXT_NODE: # libxml2's SAX2 parser interns some indentation space fixThreadDictContentForNode(c_node, c_src_dict, c_dict) From scoder at codespeak.net Tue Aug 24 13:20:14 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Aug 2010 13:20:14 +0200 (CEST) Subject: [Lxml-checkins] r76706 - lxml/trunk/src/lxml Message-ID: <20100824112014.8FE45282B90@codespeak.net> Author: scoder Date: Tue Aug 24 13:20:10 2010 New Revision: 76706 Modified: lxml/trunk/src/lxml/proxy.pxi Log: also fix up namespace declarations for all elements and XInclude start nodes Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Tue Aug 24 13:20:10 2010 @@ -426,6 +426,7 @@ if c_node.type in (tree.XML_ELEMENT_NODE, tree.XML_XINCLUDE_START): fixThreadDictNamesForAttributes( c_node.properties, c_src_dict, c_dict) + fixThreadDictNsForNode(c_node, c_src_dict, c_dict) elif c_node.type == tree.XML_TEXT_NODE: # libxml2's SAX2 parser interns some indentation space fixThreadDictContentForNode(c_node, c_src_dict, c_dict) From scoder at codespeak.net Tue Aug 24 13:20:32 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Aug 2010 13:20:32 +0200 (CEST) Subject: [Lxml-checkins] r76707 - lxml/trunk Message-ID: <20100824112032.5A1AB282B90@codespeak.net> Author: scoder Date: Tue Aug 24 13:20:24 2010 New Revision: 76707 Modified: lxml/trunk/CHANGES.txt Log: changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Aug 24 13:20:24 2010 @@ -11,6 +11,9 @@ Bugs fixed ---------- +* Crash in newer libxml2 versions when moving elements between + documents that had attributes on replaced XInclude nodes. + * ``XMLID()`` function was missing the optional ``parser`` and ``base_url`` parameters. From lxml-checkins at codespeak.net Wed Aug 25 23:10:28 2010 From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net) Date: Wed, 25 Aug 2010 23:10:28 +0200 (CEST) Subject: lxml-checkins@codespeak.net V|AGRA ® Official Seller -71% Message-ID: <20100825211028.9F978282BAD@codespeak.net> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20100825/34b83c5a/attachment.htm