From jjlee at codespeak.net Sun Oct 1 19:51:01 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sun, 1 Oct 2006 19:51:01 +0200 (CEST) Subject: [wwwsearch-commits] r32793 - wwwsearch/release_scripts Message-ID: <20061001175101.A90761005A@code0.codespeak.net> Author: jjlee Date: Sun Oct 1 19:50:59 2006 New Revision: 32793 Modified: wwwsearch/release_scripts/colorize.py Log: Add coverage support from an ASPN recipe Modified: wwwsearch/release_scripts/colorize.py ============================================================================== --- wwwsearch/release_scripts/colorize.py (original) +++ wwwsearch/release_scripts/colorize.py Sun Oct 1 19:50:59 2006 @@ -2,13 +2,23 @@ Taken from Python Cookbook (originally from MoinMoin Python Source Parser). +HTML code coverage support: + +Original recipe: + http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298 + +Original Authors: + - J\ufffdrgen Hermann + - Mike Brown + - Christopher Arndt + Hacked a bit by John J Lee . Reduced the amount of markup generated, and I forget what else... """ # Imports -import cgi, string, sys, cStringIO +import os, cgi, string, sys, cStringIO import keyword, token, tokenize @@ -69,11 +79,13 @@ """ - def __init__(self, raw, out = sys.stdout): + def __init__(self, raw, out = sys.stdout, not_covered=[]): """ Store the source text. """ self.raw = string.expandtabs(raw).rstrip() self.out = out + self.not_covered = not_covered # not covered list of lines + self.cover_flag = False # is there a tag opened? def format(self, formatter, form): """ Parse and send the colored source. @@ -112,10 +124,17 @@ newpos = self.lines[srow] + scol self.pos = newpos + len(toktext) + if not self.cover_flag and srow in self.not_covered: + self.out.write('') + self.cover_flag = True + # handle newlines if toktype in [token.NEWLINE, tokenize.NL]: - self.out.write('\n') - return + if self.cover_flag: + self.out.write('') + self.cover_flag = False +## self.out.write('\n') +## return # send the original whitespace, if needed if newpos > oldpos: @@ -144,6 +163,73 @@ if color != "py": self.out.write('') +# code coverage +# -------------------------------------------------------------------- + +_HTML_HEADER = """\ + + + +code coverage of %(title)s + + + + + + +""" + +_HTML_FOOTER = """\ + + +""" + +class MissingList(list): + def __init__(self, i): + list.__init__(self, i) + + def __contains__(self, elem): + for i in list.__iter__(self): + v_ = m_ = s_ = None + try: + v_ = int(i) + except ValueError: + m_, s_ = i.split('-') + if v_ is not None and v_ == elem: + return True + elif (m_ is not None) and (s_ is not None) and \ + (int(m_) <= elem) and (elem <= int(s_)): + return True + return False + +def colorize_file(filename, outstream=sys.stdout, not_covered=[]): + """ + Convert a python source file into colorized HTML. + + Reads file and writes to outstream (default sys.stdout). + """ + fo = file(filename, 'rb') + try: + source = fo.read() + finally: + fo.close() + outstream.write(_HTML_HEADER % {'title': os.path.basename(filename)}) + Parser(source, out=outstream, + not_covered=MissingList((not_covered and \ + not_covered.split(', ')) or \ + [])).format(None, None) + outstream.write(_HTML_FOOTER) + + +# -------------------------------------------------------------------- + def test_main(): import doctest From jjlee at codespeak.net Sun Oct 1 20:38:38 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sun, 1 Oct 2006 20:38:38 +0200 (CEST) Subject: [wwwsearch-commits] r32797 - in wwwsearch/mechanize/trunk: . mechanize Message-ID: <20061001183838.B9D181005A@code0.codespeak.net> Author: jjlee Date: Sun Oct 1 20:38:36 2006 New Revision: 32797 Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py wwwsearch/mechanize/trunk/test.py Log: Add non-working test coverage support Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Sun Oct 1 20:38:36 2006 @@ -138,6 +138,14 @@ self._response.close() return self._mech_open(url, data) + def open_no_visit(self): + """Open without visiting the URL. + + Useful for downloading image and CSS files, robots.txt, etc. + + """ + return self._mech_open(url, data) + def _mech_open(self, url, data=None, update_history=True): try: url.get_full_url Modified: wwwsearch/mechanize/trunk/test.py ============================================================================== --- wwwsearch/mechanize/trunk/test.py (original) +++ wwwsearch/mechanize/trunk/test.py Sun Oct 1 20:38:36 2006 @@ -58,7 +58,6 @@ self.testLoader = testLoader self.progName = os.path.basename(argv[0]) self.parseArgs(argv) - self.runTests() def usageExit(self, msg=None): if msg: print msg @@ -99,7 +98,7 @@ if self.testRunner is None: self.testRunner = TextTestRunner(verbosity=self.verbosity) result = self.testRunner.run(self.test) - sys.exit(not result.wasSuccessful()) + return result if __name__ == "__main__": @@ -110,6 +109,11 @@ # XXX temporary stop-gap to run doctests + # XXXX coverage output seems incorrect ATM + run_coverage = '-c' in sys.argv + if run_coverage: + sys.argv.remove("-c") + # import local copy of Python 2.5 doctest assert os.path.isdir("test") sys.path.insert(0, "test") @@ -122,6 +126,12 @@ sys.path.insert(0, "test-tools") import doctest + import coverage + if run_coverage: + print 'running coverage' + coverage.erase() + coverage.start() + import mechanize # run .doctest files needing special support @@ -163,4 +173,23 @@ import unittest test_path = os.path.join(os.path.dirname(sys.argv[0]), "test") sys.path.insert(0, test_path) - TestProgram(MODULE_NAMES) + prog = TestProgram(MODULE_NAMES) + result = prog.runTests() + + if run_coverage: + # HTML coverage report + import colorize + from mechanize import _mechanize + try: + os.mkdir("coverage") + except OSError: + pass + f, s, m, mf = coverage.analysis(_mechanize) + fo = open(os.path.join('coverage', os.path.basename(f)+'.html'), 'wb') + colorize.colorize_file(f, outstream=fo, not_covered=mf) + fo.close() + coverage.report(_mechanize) + #print coverage.analysis(_mechanize) + + # XXX exit status is wrong -- does not take account of doctests + sys.exit(not result.wasSuccessful()) From jjlee at codespeak.net Sun Oct 1 20:39:29 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sun, 1 Oct 2006 20:39:29 +0200 (CEST) Subject: [wwwsearch-commits] r32798 - in wwwsearch/mechanize/trunk: . mechanize Message-ID: <20061001183929.18F7B1005A@code0.codespeak.net> Author: jjlee Date: Sun Oct 1 20:39:27 2006 New Revision: 32798 Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py wwwsearch/mechanize/trunk/test.py Log: Revert accidental commit 32797, yawn Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Sun Oct 1 20:39:27 2006 @@ -138,14 +138,6 @@ self._response.close() return self._mech_open(url, data) - def open_no_visit(self): - """Open without visiting the URL. - - Useful for downloading image and CSS files, robots.txt, etc. - - """ - return self._mech_open(url, data) - def _mech_open(self, url, data=None, update_history=True): try: url.get_full_url Modified: wwwsearch/mechanize/trunk/test.py ============================================================================== --- wwwsearch/mechanize/trunk/test.py (original) +++ wwwsearch/mechanize/trunk/test.py Sun Oct 1 20:39:27 2006 @@ -58,6 +58,7 @@ self.testLoader = testLoader self.progName = os.path.basename(argv[0]) self.parseArgs(argv) + self.runTests() def usageExit(self, msg=None): if msg: print msg @@ -98,7 +99,7 @@ if self.testRunner is None: self.testRunner = TextTestRunner(verbosity=self.verbosity) result = self.testRunner.run(self.test) - return result + sys.exit(not result.wasSuccessful()) if __name__ == "__main__": @@ -109,11 +110,6 @@ # XXX temporary stop-gap to run doctests - # XXXX coverage output seems incorrect ATM - run_coverage = '-c' in sys.argv - if run_coverage: - sys.argv.remove("-c") - # import local copy of Python 2.5 doctest assert os.path.isdir("test") sys.path.insert(0, "test") @@ -126,12 +122,6 @@ sys.path.insert(0, "test-tools") import doctest - import coverage - if run_coverage: - print 'running coverage' - coverage.erase() - coverage.start() - import mechanize # run .doctest files needing special support @@ -173,23 +163,4 @@ import unittest test_path = os.path.join(os.path.dirname(sys.argv[0]), "test") sys.path.insert(0, test_path) - prog = TestProgram(MODULE_NAMES) - result = prog.runTests() - - if run_coverage: - # HTML coverage report - import colorize - from mechanize import _mechanize - try: - os.mkdir("coverage") - except OSError: - pass - f, s, m, mf = coverage.analysis(_mechanize) - fo = open(os.path.join('coverage', os.path.basename(f)+'.html'), 'wb') - colorize.colorize_file(f, outstream=fo, not_covered=mf) - fo.close() - coverage.report(_mechanize) - #print coverage.analysis(_mechanize) - - # XXX exit status is wrong -- does not take account of doctests - sys.exit(not result.wasSuccessful()) + TestProgram(MODULE_NAMES) From jjlee at codespeak.net Sun Oct 1 20:39:52 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sun, 1 Oct 2006 20:39:52 +0200 (CEST) Subject: [wwwsearch-commits] r32799 - wwwsearch/mechanize/trunk Message-ID: <20061001183952.780431005A@code0.codespeak.net> Author: jjlee Date: Sun Oct 1 20:39:51 2006 New Revision: 32799 Modified: wwwsearch/mechanize/trunk/test.py Log: Add non-working test coverage support Modified: wwwsearch/mechanize/trunk/test.py ============================================================================== --- wwwsearch/mechanize/trunk/test.py (original) +++ wwwsearch/mechanize/trunk/test.py Sun Oct 1 20:39:51 2006 @@ -58,7 +58,6 @@ self.testLoader = testLoader self.progName = os.path.basename(argv[0]) self.parseArgs(argv) - self.runTests() def usageExit(self, msg=None): if msg: print msg @@ -99,7 +98,7 @@ if self.testRunner is None: self.testRunner = TextTestRunner(verbosity=self.verbosity) result = self.testRunner.run(self.test) - sys.exit(not result.wasSuccessful()) + return result if __name__ == "__main__": @@ -110,6 +109,11 @@ # XXX temporary stop-gap to run doctests + # XXXX coverage output seems incorrect ATM + run_coverage = '-c' in sys.argv + if run_coverage: + sys.argv.remove("-c") + # import local copy of Python 2.5 doctest assert os.path.isdir("test") sys.path.insert(0, "test") @@ -122,6 +126,12 @@ sys.path.insert(0, "test-tools") import doctest + import coverage + if run_coverage: + print 'running coverage' + coverage.erase() + coverage.start() + import mechanize # run .doctest files needing special support @@ -163,4 +173,23 @@ import unittest test_path = os.path.join(os.path.dirname(sys.argv[0]), "test") sys.path.insert(0, test_path) - TestProgram(MODULE_NAMES) + prog = TestProgram(MODULE_NAMES) + result = prog.runTests() + + if run_coverage: + # HTML coverage report + import colorize + from mechanize import _mechanize + try: + os.mkdir("coverage") + except OSError: + pass + f, s, m, mf = coverage.analysis(_mechanize) + fo = open(os.path.join('coverage', os.path.basename(f)+'.html'), 'wb') + colorize.colorize_file(f, outstream=fo, not_covered=mf) + fo.close() + coverage.report(_mechanize) + #print coverage.analysis(_mechanize) + + # XXX exit status is wrong -- does not take account of doctests + sys.exit(not result.wasSuccessful()) From jjlee at codespeak.net Sun Oct 1 20:49:05 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sun, 1 Oct 2006 20:49:05 +0200 (CEST) Subject: [wwwsearch-commits] r32803 - wwwsearch/mechanize/trunk Message-ID: <20061001184905.0410C1005A@code0.codespeak.net> Author: jjlee Date: Sun Oct 1 20:49:04 2006 New Revision: 32803 Modified: wwwsearch/mechanize/trunk/test.py Log: Comment out HTML coverage report to remove dependency on colorize.py, so can report bug to coverage.py maintainer Modified: wwwsearch/mechanize/trunk/test.py ============================================================================== --- wwwsearch/mechanize/trunk/test.py (original) +++ wwwsearch/mechanize/trunk/test.py Sun Oct 1 20:49:04 2006 @@ -178,16 +178,16 @@ if run_coverage: # HTML coverage report - import colorize +## import colorize from mechanize import _mechanize - try: - os.mkdir("coverage") - except OSError: - pass - f, s, m, mf = coverage.analysis(_mechanize) - fo = open(os.path.join('coverage', os.path.basename(f)+'.html'), 'wb') - colorize.colorize_file(f, outstream=fo, not_covered=mf) - fo.close() +## try: +## os.mkdir("coverage") +## except OSError: +## pass +## f, s, m, mf = coverage.analysis(_mechanize) +## fo = open(os.path.join('coverage', os.path.basename(f)+'.html'), 'wb') +## colorize.colorize_file(f, outstream=fo, not_covered=mf) +## fo.close() coverage.report(_mechanize) #print coverage.analysis(_mechanize) From jjlee at codespeak.net Fri Oct 6 01:03:50 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Fri, 6 Oct 2006 01:03:50 +0200 (CEST) Subject: [wwwsearch-commits] r32925 - in wwwsearch/mechanize/trunk: . test-tools Message-ID: <20061005230350.0C9D110095@code0.codespeak.net> Author: jjlee Date: Fri Oct 6 01:03:48 2006 New Revision: 32925 Modified: wwwsearch/mechanize/trunk/test-tools/doctest.py wwwsearch/mechanize/trunk/test.py Log: Fix biggest coverage measurement problem (thanks to Ned Batchelder). There are still some oddities, though. Modified: wwwsearch/mechanize/trunk/test-tools/doctest.py ============================================================================== --- wwwsearch/mechanize/trunk/test-tools/doctest.py (original) +++ wwwsearch/mechanize/trunk/test-tools/doctest.py Fri Oct 6 01:03:48 2006 @@ -353,8 +353,19 @@ """ def __init__(self, out): self.__out = out + self.__debugger_used = False pdb.Pdb.__init__(self) + def set_trace(self): + self.__debugger_used = True + pdb.Pdb.set_trace(self) + + def set_continue(self): + # Calling set_continue unconditionally would break unit test coverage + # reporting, as Bdb.set_continue calls sys.settrace(None). + if self.__debugger_used: + pdb.Pdb.set_continue(self) + def trace_dispatch(self, *args): # Redirect stdout to the given stream. save_stdout = sys.stdout Modified: wwwsearch/mechanize/trunk/test.py ============================================================================== --- wwwsearch/mechanize/trunk/test.py (original) +++ wwwsearch/mechanize/trunk/test.py Fri Oct 6 01:03:48 2006 @@ -179,17 +179,22 @@ if run_coverage: # HTML coverage report ## import colorize - from mechanize import _mechanize ## try: ## os.mkdir("coverage") ## except OSError: ## pass -## f, s, m, mf = coverage.analysis(_mechanize) -## fo = open(os.path.join('coverage', os.path.basename(f)+'.html'), 'wb') -## colorize.colorize_file(f, outstream=fo, not_covered=mf) -## fo.close() - coverage.report(_mechanize) - #print coverage.analysis(_mechanize) + private_modules = glob.glob("mechanize/_*.py") + private_modules.remove("mechanize/__init__.py") + for module_filename in private_modules: + module_name = module_filename.replace("/", ".")[:-3] + print module_name + module = sys.modules[module_name] +## f, s, m, mf = coverage.analysis(module) +## fo = open(os.path.join('coverage', os.path.basename(f)+'.html'), 'wb') +## colorize.colorize_file(f, outstream=fo, not_covered=mf) +## fo.close() + coverage.report(module) + #print coverage.analysis(module) # XXX exit status is wrong -- does not take account of doctests sys.exit(not result.wasSuccessful()) From jjlee at codespeak.net Sat Oct 7 14:46:23 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 7 Oct 2006 14:46:23 +0200 (CEST) Subject: [wwwsearch-commits] r32984 - wwwsearch/mechanize/trunk Message-ID: <20061007124623.56EBA10068@code0.codespeak.net> Author: jjlee Date: Sat Oct 7 14:46:21 2006 New Revision: 32984 Modified: wwwsearch/mechanize/trunk/test.py Log: Uncomment HTML statment coverage output code Modified: wwwsearch/mechanize/trunk/test.py ============================================================================== --- wwwsearch/mechanize/trunk/test.py (original) +++ wwwsearch/mechanize/trunk/test.py Sat Oct 7 14:46:21 2006 @@ -178,21 +178,21 @@ if run_coverage: # HTML coverage report -## import colorize -## try: -## os.mkdir("coverage") -## except OSError: -## pass + import colorize + try: + os.mkdir("coverage") + except OSError: + pass private_modules = glob.glob("mechanize/_*.py") private_modules.remove("mechanize/__init__.py") for module_filename in private_modules: module_name = module_filename.replace("/", ".")[:-3] print module_name module = sys.modules[module_name] -## f, s, m, mf = coverage.analysis(module) -## fo = open(os.path.join('coverage', os.path.basename(f)+'.html'), 'wb') -## colorize.colorize_file(f, outstream=fo, not_covered=mf) -## fo.close() + f, s, m, mf = coverage.analysis(module) + fo = open(os.path.join('coverage', os.path.basename(f)+'.html'), 'wb') + colorize.colorize_file(f, outstream=fo, not_covered=mf) + fo.close() coverage.report(module) #print coverage.analysis(module) From jjlee at codespeak.net Sat Oct 7 14:49:58 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 7 Oct 2006 14:49:58 +0200 (CEST) Subject: [wwwsearch-commits] r32985 - wwwsearch/mechanize/trunk/docs-in-progress Message-ID: <20061007124958.5878E10068@code0.codespeak.net> Author: jjlee Date: Sat Oct 7 14:49:57 2006 New Revision: 32985 Modified: wwwsearch/mechanize/trunk/docs-in-progress/doc2.rst Log: Remove an empty code-block Modified: wwwsearch/mechanize/trunk/docs-in-progress/doc2.rst ============================================================================== --- wwwsearch/mechanize/trunk/docs-in-progress/doc2.rst (original) +++ wwwsearch/mechanize/trunk/docs-in-progress/doc2.rst Sat Oct 7 14:49:57 2006 @@ -812,8 +812,6 @@ FAQs - usage ------------ -.. code-block:: python - - Why don't I have any cookies? Read the `debugging section`_ of this page. From jjlee at codespeak.net Sat Oct 7 14:51:39 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 7 Oct 2006 14:51:39 +0200 (CEST) Subject: [wwwsearch-commits] r32986 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20061007125139.6717310068@code0.codespeak.net> Author: jjlee Date: Sat Oct 7 14:51:38 2006 New Revision: 32986 Modified: wwwsearch/mechanize/trunk/mechanize/_util.py Log: Remove some unused code Modified: wwwsearch/mechanize/trunk/mechanize/_util.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_util.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_util.py Sat Oct 7 14:51:38 2006 @@ -15,15 +15,6 @@ except: return False else: return True -SPACE_DICT = {} -for c in string.whitespace: - SPACE_DICT[c] = None -del c -def isspace(string): - for c in string: - if not SPACE_DICT.has_key(c): return False - return True - ## def caller(): ## try: ## raise SyntaxError From jjlee at codespeak.net Sat Oct 7 14:53:15 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 7 Oct 2006 14:53:15 +0200 (CEST) Subject: [wwwsearch-commits] r32987 - in wwwsearch/mechanize/trunk: mechanize test Message-ID: <20061007125315.EFF6710068@code0.codespeak.net> Author: jjlee Date: Sat Oct 7 14:53:13 2006 New Revision: 32987 Added: wwwsearch/mechanize/trunk/test/test_browser.doctest Modified: wwwsearch/mechanize/trunk/mechanize/_html.py wwwsearch/mechanize/trunk/mechanize/_mechanize.py wwwsearch/mechanize/trunk/mechanize/_seek.py wwwsearch/mechanize/trunk/mechanize/_upgrade.py wwwsearch/mechanize/trunk/test/test_browser.py Log: Fix a bunch of problems with Browser and Factory not handling None responses correctly; Add some tests Modified: wwwsearch/mechanize/trunk/mechanize/_html.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_html.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_html.py Sat Oct 7 14:53:13 2006 @@ -480,8 +480,8 @@ def set_response(self, response): """Set response. - The response must implement the same interface as objects returned by - urllib2.urlopen(). + The response must either be None or implement the same interface as + objects returned by urllib2.urlopen(). """ self._response = response Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Sat Oct 7 14:53:13 2006 @@ -98,8 +98,6 @@ if history is None: history = History() self._history = history - self.request = self._response = None - self.form = None if request_class is None: if not hasattr(urllib2.Request, "add_unredirected_header"): @@ -113,6 +111,9 @@ self._factory = factory self.request_class = request_class + self.request = None + self.set_response(None) + UserAgent.__init__(self) # do this last to avoid __getattr__ problems def close(self): @@ -208,15 +209,23 @@ return copy.copy(self._response) def set_response(self, response): - """Replace current response with (a copy of) response.""" + """Replace current response with (a copy of) response. + + response may be None. + """ # sanity check, necessary but far from sufficient - if not (hasattr(response, "info") and hasattr(response, "geturl") and - hasattr(response, "read")): + if not (response is None or + (hasattr(response, "info") and hasattr(response, "geturl") and + hasattr(response, "read") + ) + ): raise ValueError("not a response object") self.form = None - self._response = _upgrade.upgrade_response(response) - self._factory.set_response(self._response) + if response is not None: + response = _upgrade.upgrade_response(response) + self._response = response + self._factory.set_response(response) def geturl(self): """Get URL of current document.""" Modified: wwwsearch/mechanize/trunk/mechanize/_seek.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_seek.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_seek.py Sat Oct 7 14:53:13 2006 @@ -6,6 +6,6 @@ """Make responses seekable.""" def any_response(self, request, response): - if not hasattr(response, "seek"): + if response is not None and not hasattr(response, "seek"): return response_seek_wrapper(response) return response Modified: wwwsearch/mechanize/trunk/mechanize/_upgrade.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_upgrade.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_upgrade.py Sat Oct 7 14:53:13 2006 @@ -27,6 +27,7 @@ # upgrade responses to be .close()able without becoming unusable handler_order = 0 # before anything else def any_response(self, request, response): - if not hasattr(response, 'closeable_response'): + if (response is not None and + not hasattr(response, 'closeable_response')): response = upgrade_response(response) return response Added: wwwsearch/mechanize/trunk/test/test_browser.doctest ============================================================================== --- (empty file) +++ wwwsearch/mechanize/trunk/test/test_browser.doctest Sat Oct 7 14:53:13 2006 @@ -0,0 +1,32 @@ +>>> from test_browser import TestBrowser, TestBrowser2 + +Warn early about some mistakes setting a response object + +>>> import StringIO +>>> br = TestBrowser() +>>> br.set_response("blah") +Traceback (most recent call last): +... +ValueError: not a response object +>>> br.set_response(StringIO.StringIO()) +Traceback (most recent call last): +... +ValueError: not a response object + + +.open() without an appropriate scheme handler should fail with +URLError + +>>> br = TestBrowser2() +>>> br.open("http://example.com") +Traceback (most recent call last): +... +URLError: + +Reload after failed .open() should fail due to failure to open, not +with BrowserStateError + +>>> br.reload() +Traceback (most recent call last): +... +URLError: Modified: wwwsearch/mechanize/trunk/test/test_browser.py ============================================================================== --- wwwsearch/mechanize/trunk/test/test_browser.py (original) +++ wwwsearch/mechanize/trunk/test/test_browser.py Sat Oct 7 14:53:13 2006 @@ -105,6 +105,15 @@ default_others = [] default_schemes = [] +class TestBrowser2(mechanize.Browser): + # XXX better name! + # As TestBrowser, this is neutered so doesn't know about protocol handling, + # but still knows what to do with unknown schemes, etc., because + # UserAgent's default_others list is left intact, including classes like + # UnknownHandler + default_features = ["_seek"] + default_schemes = [] + class BrowserTests(TestCase): @@ -328,17 +337,44 @@ url = "http://example.com/" b = TestBrowser() - b.add_handler(make_mock_handler()([("http_open", MockResponse(url, "", {}))])) + + self.assert_(b.response() is None) + + # To open a relative reference (often called a "relative URL"), you + # have to have already opened a URL for it "to be relative to". + self.assertRaises(mechanize.BrowserStateError, b.open, "relative_ref") + + # we can still clear the history even if we've not visited any URL + b.clear_history() + + # most methods raise BrowserStateError... + def test_state_error(method_names): + for attr in method_names: + method = getattr(b, attr) + #print attr + self.assertRaises(mechanize.BrowserStateError, method) + self.assertRaises(mechanize.BrowserStateError, b.select_form, + name="blah") + self.assertRaises(mechanize.BrowserStateError, b.find_link, + name="blah") + # ...if not visiting a URL... + test_state_error(("geturl reload back viewing_html encoding " + "click links forms title select_form".split())) + self.assertRaises(mechanize.BrowserStateError, b.set_cookie, "foo=bar") + self.assertRaises(mechanize.BrowserStateError, b.submit, nr=0) + self.assertRaises(mechanize.BrowserStateError, b.click_link, nr=0) + self.assertRaises(mechanize.BrowserStateError, b.follow_link, nr=0) + self.assertRaises(mechanize.BrowserStateError, b.find_link, nr=0) + # ...and lots do so if visiting a non-HTML URL + b.add_handler(make_mock_handler()( + [("http_open", MockResponse(url, "", {}))])) r = b.open(url) self.assert_(not b.viewing_html()) - self.assertRaises(mechanize.BrowserStateError, b.links) - self.assertRaises(mechanize.BrowserStateError, b.forms) - self.assertRaises(mechanize.BrowserStateError, b.title) - self.assertRaises(mechanize.BrowserStateError, b.select_form) - self.assertRaises(mechanize.BrowserStateError, b.select_form, - name="blah") - self.assertRaises(mechanize.BrowserStateError, b.find_link, - name="blah") + test_state_error("click links forms title select_form".split()) + self.assertRaises(mechanize.BrowserStateError, b.submit, nr=0) + self.assertRaises(mechanize.BrowserStateError, b.click_link, nr=0) + self.assertRaises(mechanize.BrowserStateError, b.follow_link, nr=0) + self.assertRaises(mechanize.BrowserStateError, b.find_link, nr=0) b = TestBrowser() r = MockResponse(url, From jjlee at codespeak.net Sat Oct 7 15:29:23 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 7 Oct 2006 15:29:23 +0200 (CEST) Subject: [wwwsearch-commits] r32989 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20061007132923.44F2010068@code0.codespeak.net> Author: jjlee Date: Sat Oct 7 15:29:22 2006 New Revision: 32989 Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py Log: Formatting nit Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Sat Oct 7 15:29:22 2006 @@ -51,6 +51,7 @@ response.close() del self._history[:] + class Browser(UserAgent): """Browser-like class with support for history, forms and links. From jjlee at codespeak.net Sat Oct 7 15:30:36 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 7 Oct 2006 15:30:36 +0200 (CEST) Subject: [wwwsearch-commits] r32990 - in wwwsearch/mechanize/trunk: mechanize test Message-ID: <20061007133036.BC9A510068@code0.codespeak.net> Author: jjlee Date: Sat Oct 7 15:30:35 2006 New Revision: 32990 Modified: wwwsearch/mechanize/trunk/mechanize/_response.py wwwsearch/mechanize/trunk/test/test_browser.doctest wwwsearch/mechanize/trunk/test/test_browser.py Log: Add some more history tests Modified: wwwsearch/mechanize/trunk/mechanize/_response.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_response.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_response.py Sat Oct 7 15:30:35 2006 @@ -349,7 +349,7 @@ state["wrapped"] = new_wrapped return state -def test_response(data, headers, +def test_response(data='test data', headers=[], url="http://example.com/", code=200, msg="OK"): return make_response(data, headers, url, code, msg) Modified: wwwsearch/mechanize/trunk/test/test_browser.doctest ============================================================================== --- wwwsearch/mechanize/trunk/test/test_browser.doctest (original) +++ wwwsearch/mechanize/trunk/test/test_browser.doctest Sat Oct 7 15:30:35 2006 @@ -1,9 +1,10 @@ ->>> from test_browser import TestBrowser, TestBrowser2 +>>> from mechanize._response import test_response +>>> from test_browser import TestBrowser2, make_mock_handler Warn early about some mistakes setting a response object >>> import StringIO ->>> br = TestBrowser() +>>> br = TestBrowser2() >>> br.set_response("blah") Traceback (most recent call last): ... @@ -30,3 +31,39 @@ Traceback (most recent call last): ... URLError: + + +.clear_history() should do what it says on the tin. Note that the +history does not include the current response! + +>>> br = TestBrowser2() +>>> br.add_handler(make_mock_handler(test_response)([("http_open", None)])) + +>>> br.response() is None +True +>>> len(br._history._history) +0 + +>>> r = br.open("http://example.com/1") +>>> br.response() is not None +True +>>> len(br._history._history) +0 + +>>> br.clear_history() +>>> br.response() is not None +True +>>> len(br._history._history) +0 + +>>> r = br.open("http://example.com/2") +>>> br.response() is not None +True +>>> len(br._history._history) +1 + +>>> br.clear_history() +>>> br.response() is not None +True +>>> len(br._history._history) +0 Modified: wwwsearch/mechanize/trunk/test/test_browser.py ============================================================================== --- wwwsearch/mechanize/trunk/test/test_browser.py (original) +++ wwwsearch/mechanize/trunk/test/test_browser.py Sat Oct 7 15:30:35 2006 @@ -17,7 +17,7 @@ # XXX these 'mock' classes are badly in need of simplification / removal -# (note this stuff is also used by test_useragent.py) +# (note this stuff is also used by test_useragent.py and test_browser.doctest) class MockMethod: def __init__(self, meth_name, action, handle): self.meth_name = meth_name @@ -56,7 +56,7 @@ def __setstate__(self, state): self.__dict__ = state -def make_mock_handler(): +def make_mock_handler(response_class=MockResponse): class MockHandler: processor_order = 500 handler_order = -1 @@ -77,7 +77,7 @@ r = response r.seek(0) else: - r = MockResponse() + r = response_class() req = args[0] r.url = req.get_full_url() return r From jjlee at codespeak.net Sun Oct 8 01:34:38 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sun, 8 Oct 2006 01:34:38 +0200 (CEST) Subject: [wwwsearch-commits] r32996 - in wwwsearch/mechanize/trunk: . mechanize test Message-ID: <20061007233438.17D6D1006F@code0.codespeak.net> Author: jjlee Date: Sun Oct 8 01:34:33 2006 New Revision: 32996 Modified: wwwsearch/mechanize/trunk/functional_tests.py wwwsearch/mechanize/trunk/mechanize/_http.py wwwsearch/mechanize/trunk/mechanize/_mechanize.py wwwsearch/mechanize/trunk/mechanize/_opener.py wwwsearch/mechanize/trunk/mechanize/_request.py wwwsearch/mechanize/trunk/mechanize/_upgrade.py wwwsearch/mechanize/trunk/test/test_browser.doctest wwwsearch/mechanize/trunk/test/test_urllib2.py Log: Add Browser.open_novisit() and Request.visit -- this fixes an issue where internal open of robots.txt would affect browser state; Also fix test_redirect in functional_tests.py (wasn't actually redirecting) Modified: wwwsearch/mechanize/trunk/functional_tests.py ============================================================================== --- wwwsearch/mechanize/trunk/functional_tests.py (original) +++ wwwsearch/mechanize/trunk/functional_tests.py Sun Oct 8 01:34:33 2006 @@ -60,13 +60,27 @@ self.assertEqual(self.browser.title(), 'Python bits') def test_redirect(self): - # 302 redirect due to missing final '/' - self.browser.open('http://wwwsearch.sourceforge.net') + # 301 redirect due to missing final '/' + r = self.browser.open('http://wwwsearch.sourceforge.net/bits') + self.assertEqual(r.code, 200) + self.assert_("GeneralFAQ.html" in r.read(2048)) def test_file_url(self): url = "file://%s" % sanepathname2url( os.path.abspath('functional_tests.py')) - self.browser.open(url) + r = self.browser.open(url) + self.assert_("this string appears in this file ;-)" in r.read()) + + def test_open_novisit(self): + def test_state(br): + self.assert_(br.request is None) + self.assert_(br.response() is None) + self.assertRaises(mechanize.BrowserStateError, br.back) + test_state(self.browser) + # note this involves a redirect, which should itself be non-visiting + r = self.browser.open_novisit("http://wwwsearch.sourceforge.net/bits") + test_state(self.browser) + self.assert_("GeneralFAQ.html" in r.read(2048)) class ResponseTests(TestCase): Modified: wwwsearch/mechanize/trunk/mechanize/_http.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_http.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_http.py Sun Oct 8 01:34:33 2006 @@ -76,10 +76,16 @@ # from the user (of urllib2, in this case). In practice, # essentially all clients do redirect in this case, so we do # the same. + try: + visit = req.visit + except AttributeError: + visit = None return Request(newurl, headers=req.headers, origin_req_host=req.get_origin_req_host(), - unverifiable=True) + unverifiable=True, + visit=visit, + ) else: raise HTTPError(req.get_full_url(), code, msg, headers, fp) @@ -348,8 +354,9 @@ """Reads the robots.txt URL and feeds it to the parser.""" if self._opener is None: self.set_opener() + req = Request(self.url, unverifiable=True, visit=False) try: - f = self._opener.open(self.url) + f = self._opener.open(req) except HTTPError, f: pass except (IOError, socket.error, OSError), exc: Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Sun Oct 8 01:34:33 2006 @@ -135,12 +135,25 @@ self.select_form = self.click = self.submit = self.click_link = None self.follow_link = self.find_link = None + def open_novisit(self, url, data=None): + """Open a URL without visiting it. + + The browser state (including .request, .response(), history, forms and + links) are all left unchanged by calling this function. + + The interface is the same as for .open(). + + This is useful for things like fetching images. + + See also .retrieve(). + + """ + return self._mech_open(url, data, visit=False) + def open(self, url, data=None): - if self._response is not None: - self._response.close() return self._mech_open(url, data) - def _mech_open(self, url, data=None, update_history=True): + def _mech_open(self, url, data=None, update_history=True, visit=None): try: url.get_full_url except AttributeError: @@ -154,16 +167,23 @@ "can't fetch relative URL: not viewing any document") url = urlparse.urljoin(self._response.geturl(), url) - if self.request is not None and update_history: - self._history.add(self.request, self._response) - self._response = None - # we want self.request to be assigned even if UserAgent.open fails - self.request = self._request(url, data) - self._previous_scheme = self.request.get_type() + request = self._request(url, data, visit) + visit = request.visit + if visit is None: + visit = True + + if visit: + if self._response is not None: + self._response.close() + if self.request is not None and update_history: + self._history.add(self.request, self._response) + self._response = None + # we want self.request to be assigned even if UserAgent.open fails + self.request = request success = True try: - response = UserAgent.open(self, self.request, data) + response = UserAgent.open(self, request, data) except urllib2.HTTPError, error: success = False if error.fp is None: # not a response @@ -180,9 +200,12 @@ ## # Python core, a fix would need some backwards-compat. hack to be ## # acceptable. ## raise - self.set_response(response) - response = copy.copy(self._response) + if visit: + self.set_response(response) + response = copy.copy(self._response) + elif response is not None: + response = _upgrade.upgrade_response(response) if not success: raise response Modified: wwwsearch/mechanize/trunk/mechanize/_opener.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_opener.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_opener.py Sun Oct 8 01:34:33 2006 @@ -136,18 +136,25 @@ self._any_request = any_request self._any_response = any_response - def _request(self, url_or_req, data): + def _request(self, url_or_req, data, visit): if isstringlike(url_or_req): - req = Request(url_or_req, data) + req = Request(url_or_req, data, visit=visit) else: # already a urllib2.Request or mechanize.Request instance req = url_or_req if data is not None: req.add_data(data) + # XXX yuck, give request a .visit attribute if it doesn't have one + try: + req.visit + except AttributeError: + req.visit = None + if visit is not None: + req.visit = visit return req def open(self, fullurl, data=None): - req = self._request(fullurl, data) + req = self._request(fullurl, data, None) req_scheme = req.get_type() self._maybe_reindex_handlers() @@ -222,7 +229,7 @@ headers) that would have been returned. """ - req = self._request(fullurl, data) + req = self._request(fullurl, data, False) scheme = req.get_type() fp = self.open(req) headers = fp.info() Modified: wwwsearch/mechanize/trunk/mechanize/_request.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_request.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_request.py Sun Oct 8 01:34:33 2006 @@ -16,10 +16,11 @@ class Request(urllib2.Request): def __init__(self, url, data=None, headers={}, - origin_req_host=None, unverifiable=False): + origin_req_host=None, unverifiable=False, visit=None): urllib2.Request.__init__(self, url, data, headers) self.selector = None self.unredirected_hdrs = {} + self.visit = visit # All the terminology below comes from RFC 2965. self.unverifiable = unverifiable Modified: wwwsearch/mechanize/trunk/mechanize/_upgrade.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_upgrade.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_upgrade.py Sun Oct 8 01:34:33 2006 @@ -17,6 +17,8 @@ except AttributeError: pass try: newrequest.unverifiable = request.unverifiable except AttributeError: pass + try: newrequest.visit = request.visit + except AttributeError: pass request = newrequest return request Modified: wwwsearch/mechanize/trunk/test/test_browser.doctest ============================================================================== --- wwwsearch/mechanize/trunk/test/test_browser.doctest (original) +++ wwwsearch/mechanize/trunk/test/test_browser.doctest Sun Oct 8 01:34:33 2006 @@ -67,3 +67,34 @@ True >>> len(br._history._history) 0 + + +.open()ing a Request with False .visit does not affect Browser state. +Redirections during such a non-visiting request should also be +non-visiting. + +>>> from mechanize import BrowserStateError, Request, HTTPRedirectHandler +>>> from test_urllib2 import MockHTTPHandler + +>>> req = Request("http://example.com") +>>> req.visit = False +>>> br = TestBrowser2() +>>> hh = MockHTTPHandler(301, "Location: http://example.com/\r\n\r\n") +>>> br.add_handler(hh) +>>> br.add_handler(HTTPRedirectHandler()) +>>> def raises(exc_class, fn, *args, **kwds): +... try: +... fn(*args, **kwds) +... except exc_class, exc: +... return True +... return False +>>> def test_state(br): +... return (br.request is None and +... br.response() is None and +... raises(BrowserStateError, br.back) +... ) +>>> test_state(br) +True +>>> r = br.open(req) +>>> test_state(br) +True Modified: wwwsearch/mechanize/trunk/test/test_urllib2.py ============================================================================== --- wwwsearch/mechanize/trunk/test/test_urllib2.py (original) +++ wwwsearch/mechanize/trunk/test/test_urllib2.py Sun Oct 8 01:34:33 2006 @@ -22,6 +22,7 @@ import mechanize from mechanize._http import AbstractHTTPHandler, parse_head +from mechanize._response import test_response from mechanize import HTTPRedirectHandler, HTTPRequestUpgradeProcessor, \ HTTPEquivProcessor, HTTPRefreshProcessor, SeekableProcessor, \ HTTPCookieProcessor, HTTPRefererProcessor, \ @@ -1092,11 +1093,10 @@ self._count = self._count + 1 msg = mimetools.Message(StringIO(self.headers)) return self.parent.error( - "http", req, MockFile(), self.code, "Blah", msg) + "http", req, test_response(), self.code, "Blah", msg) else: self.req = req - msg = mimetools.Message(StringIO("\r\n\r\n")) - return MockResponse(200, "OK", msg, "", req.get_full_url()) + return test_response("", [], req.get_full_url()) class MyHTTPHandler(HTTPHandler): pass From jjlee at codespeak.net Mon Oct 9 00:36:56 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Mon, 9 Oct 2006 00:36:56 +0200 (CEST) Subject: [wwwsearch-commits] r33024 - wwwsearch/ClientForm/trunk Message-ID: <20061008223656.9B1BF10076@code0.codespeak.net> Author: jjlee Date: Mon Oct 9 00:36:53 2006 New Revision: 33024 Modified: wwwsearch/ClientForm/trunk/ClientForm.py Log: Allow mechanize to supply URL join / parse / unparse functions, to allow mechanize follow RFC 3986, thus fixing some URL processing bugs. ClientForm should do the same, probably I should merge the two projects after final mechanize release. Modified: wwwsearch/ClientForm/trunk/ClientForm.py ============================================================================== --- wwwsearch/ClientForm/trunk/ClientForm.py (original) +++ wwwsearch/ClientForm/trunk/ClientForm.py Mon Oct 9 00:36:53 2006 @@ -104,7 +104,6 @@ import sys, urllib, urllib2, types, mimetools, copy, urlparse, \ htmlentitydefs, re, random -from urlparse import urljoin from cStringIO import StringIO try: @@ -850,6 +849,11 @@ entitydefs=None, backwards_compat=True, encoding=DEFAULT_ENCODING, + + # private + _urljoin=urlparse.urljoin, + _urlparse=urlparse.urlparse, + _urlunparse=urlparse.urlunparse, ): """Parse HTTP response and return a list of HTMLForm instances. @@ -917,6 +921,9 @@ entitydefs, backwards_compat, encoding, + _urljoin=_urljoin, + _urlparse=_urlparse, + _urlunparse=_urlunparse, ) def ParseFile(file, base_uri, select_default=False, @@ -926,6 +933,13 @@ entitydefs=None, backwards_compat=True, encoding=DEFAULT_ENCODING, + + # these private arguments ars here as a hack to allow mechanize + # to follow RFC 3986. ClientForm should do the same really -- + # perhaps it's time to merge ClientForm with mechanize... + _urljoin=urlparse.urljoin, + _urlparse=urlparse.urlparse, + _urlunparse=urlparse.urlunparse, ): """Parse HTML and return a list of HTMLForm instances. @@ -970,7 +984,7 @@ if action is None: action = base_uri else: - action = urljoin(base_uri, action) + action = _urljoin(base_uri, action) action = fp.unescape_attr_if_required(action) name = fp.unescape_attr_if_required(name) attrs = fp.unescape_attrs_if_required(attrs) @@ -978,6 +992,8 @@ form = HTMLForm( action, method, enctype, name, attrs, request_class, forms, labels, id_to_labels, backwards_compat) + form._urlparse = _urlparse + form._urlunparse = _urlunparse for ii in range(len(controls)): type, name, attrs = controls[ii] attrs = fp.unescape_attrs_if_required(attrs) @@ -1171,6 +1187,9 @@ self._clicked = False + self._urlparse = urlparse.urlparse + self._urlunparse = urlparse.urlunparse + def __getattr__(self, name): if name == "value": return self.__dict__["_value"] @@ -1379,10 +1398,10 @@ # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is # deprecated in 4.01, but it should still say how to submit it). # Submission of ISINDEX is explained in the HTML 3.2 spec, though. - parts = urlparse.urlparse(form.action) + parts = self._urlparse(form.action) rest, (query, frag) = parts[:-2], parts[-2:] parts = rest + (urllib.quote_plus(self.value), "") - url = urlparse.urlunparse(parts) + url = self._urlunparse(parts) req_data = url, None, [] if return_type == "pairs": @@ -2616,6 +2635,9 @@ self.backwards_compat = backwards_compat # note __setattr__ + self._urlunparse = urlparse.urlunparse + self._urlparse = urlparse.urlparse + def __getattr__(self, name): if name == "backwards_compat": return self._backwards_compat @@ -2674,6 +2696,8 @@ else: control = klass(type, name, a, index) control.add_to_form(self) + control._urlparse = self._urlparse + control._urlunparse = self._urlunparse def fixup(self): """Normalise form after all controls have been added. @@ -3162,7 +3186,7 @@ """Return a tuple (url, data, headers).""" method = self.method.upper() #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(self.action) - parts = urlparse.urlparse(self.action) + parts = self._urlparse(self.action) rest, (query, frag) = parts[:-2], parts[-2:] if method == "GET": @@ -3170,11 +3194,11 @@ raise ValueError( "unknown GET form encoding type '%s'" % self.enctype) parts = rest + (urlencode(self._pairs()), "") - uri = urlparse.urlunparse(parts) + uri = self._urlunparse(parts) return uri, None, [] elif method == "POST": parts = rest + (query, "") - uri = urlparse.urlunparse(parts) + uri = self._urlunparse(parts) if self.enctype == "application/x-www-form-urlencoded": return (uri, urlencode(self._pairs()), [("Content-type", self.enctype)]) From jjlee at codespeak.net Mon Oct 9 02:01:00 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Mon, 9 Oct 2006 02:01:00 +0200 (CEST) Subject: [wwwsearch-commits] r33025 - wwwsearch/ClientForm/trunk Message-ID: <20061009000100.49E8210076@code0.codespeak.net> Author: jjlee Date: Mon Oct 9 02:00:58 2006 New Revision: 33025 Modified: wwwsearch/ClientForm/trunk/README.html.in Log: Fix typo about backwards_compat flag Modified: wwwsearch/ClientForm/trunk/README.html.in ============================================================================== --- wwwsearch/ClientForm/trunk/README.html.in (original) +++ wwwsearch/ClientForm/trunk/README.html.in Mon Oct 9 02:00:58 2006 @@ -152,7 +152,7 @@ deselected: AttributeError is raised in 0.2, whereas deselection was allowed in 0.1. The bug in 0.1 and in 0.2's backwards-compatibility mode will not be fixed, to preserve compatibility and to encourage people to upgrade to the new -0.2 backwards_compat=True behaviour. +0.2 backwards_compat=False behaviour.

Credits

From jjlee at codespeak.net Mon Oct 9 02:01:39 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Mon, 9 Oct 2006 02:01:39 +0200 (CEST) Subject: [wwwsearch-commits] r33026 - wwwsearch/ClientForm/trunk Message-ID: <20061009000139.B791610076@code0.codespeak.net> Author: jjlee Date: Mon Oct 9 02:01:38 2006 New Revision: 33026 Modified: wwwsearch/ClientForm/trunk/ClientForm.py wwwsearch/ClientForm/trunk/test.py Log: Nameless list controls should never be successful Modified: wwwsearch/ClientForm/trunk/ClientForm.py ============================================================================== --- wwwsearch/ClientForm/trunk/ClientForm.py (original) +++ wwwsearch/ClientForm/trunk/ClientForm.py Mon Oct 9 02:01:38 2006 @@ -1924,6 +1924,8 @@ def __getattr__(self, name): if name == "value": compat = self._form.backwards_compat + if self.name is None: + return [] return [o.name for o in self.items if o.selected and (not o.disabled or compat)] else: @@ -2093,7 +2095,7 @@ return [o.name for o in self.items] def _totally_ordered_pairs(self): - if self.disabled: + if self.disabled or self.name is None: return [] else: return [(o._index, self.name, o.name) for o in self.items Modified: wwwsearch/ClientForm/trunk/test.py ============================================================================== --- wwwsearch/ClientForm/trunk/test.py (original) +++ wwwsearch/ClientForm/trunk/test.py Mon Oct 9 02:01:38 2006 @@ -2999,20 +2999,20 @@
- +
""", """\
- +
""", """\
- +
""", ]: @@ -3023,6 +3023,7 @@ # should have value "on", but not be successful self.assertEqual([item.name for item in bar.items], ["on"]) self.assertEqual(bar.value, []) + self.assertEqual(form.click_pairs(), []) class ContentTypeTests(TestCase): From jjlee at codespeak.net Tue Oct 10 02:05:53 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Tue, 10 Oct 2006 02:05:53 +0200 (CEST) Subject: [wwwsearch-commits] r33079 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20061010000553.19970100D2@code0.codespeak.net> Author: jjlee Date: Tue Oct 10 02:05:44 2006 New Revision: 33079 Modified: wwwsearch/mechanize/trunk/mechanize/_clientcookie.py wwwsearch/mechanize/trunk/mechanize/_headersutil.py wwwsearch/mechanize/trunk/mechanize/_html.py wwwsearch/mechanize/trunk/mechanize/_http.py wwwsearch/mechanize/trunk/mechanize/_mechanize.py wwwsearch/mechanize/trunk/mechanize/_opener.py Log: Follow RFC 3986 for URL parsing, unparsing and joining -- stop using module urlparse, start using _rfc3986 (but not in _auth.py yet, since I'm probably abusing urlparse in there, so that requires more thought) Modified: wwwsearch/mechanize/trunk/mechanize/_clientcookie.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_clientcookie.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_clientcookie.py Tue Oct 10 02:05:44 2006 @@ -32,7 +32,7 @@ """ -import sys, re, urlparse, copy, time, struct, urllib, types, logging +import sys, re, copy, time, struct, urllib, types, logging try: import threading _threading = threading; del threading @@ -47,6 +47,7 @@ from _headersutil import split_header_words, parse_ns_headers from _util import isstringlike +import _rfc3986 debug = logging.getLogger("mechanize.cookies").debug @@ -156,8 +157,8 @@ """ url = request.get_full_url() - host = urlparse.urlparse(url)[1] - if host == "": + host = _rfc3986.urlsplit(url)[1] + if host is None: host = request.get_header("Host", "") # remove port, if present @@ -178,15 +179,10 @@ def request_path(request): """request-URI, as defined by RFC 2965.""" url = request.get_full_url() - #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url) - #req_path = escape_path(string.join(urlparse.urlparse(url)[2:], "")) - path, parameters, query, frag = urlparse.urlparse(url)[2:] - if parameters: - path = "%s;%s" % (path, parameters) + path, query, frag = _rfc3986.urlsplit(url)[2:] path = escape_path(path) - req_path = urlparse.urlunparse(("", "", path, "", query, frag)) + req_path = _rfc3986.urlunsplit((None, None, path, query, frag)) if not req_path.startswith("/"): - # fix bad RFC 2396 absoluteURI req_path = "/"+req_path return req_path Modified: wwwsearch/mechanize/trunk/mechanize/_headersutil.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_headersutil.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_headersutil.py Tue Oct 10 02:05:44 2006 @@ -9,12 +9,13 @@ """ -import os, re, urlparse +import os, re from types import StringType from types import UnicodeType STRING_TYPES = StringType, UnicodeType from _util import http2time +import _rfc3986 def is_html(ct_headers, url, allow_xhtml=False): """ @@ -24,7 +25,7 @@ """ if not ct_headers: # guess - ext = os.path.splitext(urlparse.urlparse(url)[2])[1] + ext = os.path.splitext(_rfc3986.urlsplit(url)[2])[1] html_exts = [".htm", ".html"] if allow_xhtml: html_exts += [".xhtml"] Modified: wwwsearch/mechanize/trunk/mechanize/_html.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_html.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_html.py Tue Oct 10 02:05:44 2006 @@ -9,17 +9,10 @@ """ import re, copy, urllib, htmlentitydefs -from urlparse import urljoin import _request from _headersutil import split_header_words, is_html as _is_html - -## # XXXX miserable hack -## def urljoin(base, url): -## if url.startswith("?"): -## return base+url -## else: -## return urlparse.urljoin(base, url) +import _rfc3986 ## def chr_range(a, b): ## return "".join(map(chr, range(ord(a), ord(b)+1))) @@ -99,7 +92,7 @@ def __init__(self, base_url, url, text, tag, attrs): assert None not in [url, tag, attrs] self.base_url = base_url - self.absolute_url = urljoin(base_url, url) + self.absolute_url = _rfc3986.urljoin(base_url, url) self.url, self.text, self.tag, self.attrs = url, text, tag, attrs def __cmp__(self, other): try: @@ -233,6 +226,9 @@ request_class=self.request_class, backwards_compat=self.backwards_compat, encoding=encoding, + _urljoin=_rfc3986.urljoin, + _urlparse=_rfc3986.urlsplit, + _urlunparse=_rfc3986.urlunsplit, ) class TitleFactory: Modified: wwwsearch/mechanize/trunk/mechanize/_http.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_http.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_http.py Tue Oct 10 02:05:44 2006 @@ -12,7 +12,7 @@ """ -import copy, time, tempfile, htmlentitydefs, re, logging, socket, urlparse, \ +import copy, time, tempfile, htmlentitydefs, re, logging, socket, \ urllib2, urllib, httplib, sgmllib from urllib2 import URLError, HTTPError, BaseHandler from cStringIO import StringIO @@ -23,6 +23,7 @@ from _html import unescape, unescape_charref from _headersutil import is_html from _clientcookie import CookieJar, request_host +import _rfc3986 debug = logging.getLogger("mechanize.cookies").debug @@ -98,7 +99,7 @@ newurl = headers.getheaders('uri')[0] else: return - newurl = urlparse.urljoin(req.get_full_url(), newurl) + newurl = _rfc3986.urljoin(req.get_full_url(), newurl) # XXX Probably want to forget about the state of the current # request, although that might interact poorly with other Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Tue Oct 10 02:05:44 2006 @@ -9,13 +9,14 @@ """ -import urllib2, urlparse, sys, copy, re +import urllib2, sys, copy, re from _useragent import UserAgent from _html import DefaultFactory from _response import response_seek_wrapper, closeable_response import _upgrade import _request +import _rfc3986 __version__ = (0, 1, 3, None, None) # 0.1.3 @@ -158,14 +159,13 @@ url.get_full_url except AttributeError: # string URL -- convert to absolute URL if required - scheme, netloc = urlparse.urlparse(url)[:2] - if not scheme: + scheme, authority = _rfc3986.urlsplit(url)[:2] + if scheme is None: # relative URL - assert not netloc, "malformed URL" if self._response is None: raise BrowserStateError( - "can't fetch relative URL: not viewing any document") - url = urlparse.urljoin(self._response.geturl(), url) + "can't fetch relative reference: not viewing any document") + url = _rfc3986.urljoin(self._response.geturl(), url) request = self._request(url, data, visit) visit = request.visit @@ -432,9 +432,9 @@ original_scheme in ["http", "https"] and not (original_scheme == "https" and scheme != "https")): # strip URL fragment (RFC 2616 14.36) - parts = urlparse.urlparse(self.request.get_full_url()) - parts = parts[:-1]+("",) - referer = urlparse.urlunparse(parts) + parts = _rfc3986.urlsplit(self.request.get_full_url()) + parts = parts[:-1]+(None,) + referer = _rfc3986.urlunsplit(parts) request.add_unredirected_header("Referer", referer) return request Modified: wwwsearch/mechanize/trunk/mechanize/_opener.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_opener.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_opener.py Tue Oct 10 02:05:44 2006 @@ -9,7 +9,7 @@ """ -import os, urllib2, bisect, urllib, urlparse, httplib, types, tempfile +import os, urllib2, bisect, urllib, httplib, types, tempfile try: import threading as _threading except ImportError: @@ -22,6 +22,7 @@ import _http import _upgrade +import _rfc3986 from _util import isstringlike from _request import Request @@ -241,7 +242,7 @@ if filename: tfp = open(filename, 'wb') else: - path = urlparse.urlparse(fullurl)[2] + path = _rfc3986.urlsplit(fullurl)[2] suffix = os.path.splitext(path)[1] fd, filename = tempfile.mkstemp(suffix) self._tempfiles.append(filename) From jjlee at codespeak.net Wed Oct 11 22:59:11 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Wed, 11 Oct 2006 22:59:11 +0200 (CEST) Subject: [wwwsearch-commits] r33194 - wwwsearch/ClientForm/trunk Message-ID: <20061011205911.22EAE101AE@code0.codespeak.net> Author: jjlee Date: Wed Oct 11 22:59:07 2006 New Revision: 33194 Modified: wwwsearch/ClientForm/trunk/ClientForm.py Log: Fix entity reference / character reference handling for Python 2.5 Modified: wwwsearch/ClientForm/trunk/ClientForm.py ============================================================================== --- wwwsearch/ClientForm/trunk/ClientForm.py (original) +++ wwwsearch/ClientForm/trunk/ClientForm.py Wed Oct 11 22:59:07 2006 @@ -795,14 +795,30 @@ import sgmllib # monkeypatch to fix http://www.python.org/sf/803422 :-( sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]") + class _AbstractSgmllibParser(_AbstractFormParser): + def do_option(self, attrs): _AbstractFormParser._start_option(self, attrs) - def unescape_attr_if_required(self, name): - return self.unescape_attr(name) - def unescape_attrs_if_required(self, attrs): - return self.unescape_attrs(attrs) + if sys.version_info[:2] >= (2,5): + # we override this attr to decode hex charrefs + entity_or_charref = re.compile( + '&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)') + def convert_entityref(self, name): + return unescape("&%s;" % name, self._entitydefs, self._encoding) + def convert_charref(self, name): + return unescape_charref("%s" % name, self._encoding) + def unescape_attr_if_required(self, name): + return name # sgmllib already did it + def unescape_attrs_if_required(self, attrs): + return attrs # ditto + else: + def unescape_attr_if_required(self, name): + return self.unescape_attr(name) + def unescape_attrs_if_required(self, attrs): + return self.unescape_attrs(attrs) + class FormParser(_AbstractSgmllibParser, sgmllib.SGMLParser): """Good for tolerance of incorrect HTML, bad for XHTML.""" From jjlee at codespeak.net Thu Oct 12 00:23:36 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 12 Oct 2006 00:23:36 +0200 (CEST) Subject: [wwwsearch-commits] r33200 - wwwsearch/ClientForm/trunk Message-ID: <20061011222336.59E6F101B2@code0.codespeak.net> Author: jjlee Date: Thu Oct 12 00:23:34 2006 New Revision: 33200 Modified: wwwsearch/ClientForm/trunk/ClientForm.py wwwsearch/ClientForm/trunk/test.py Log: Handle line endings in element content the same way browsers do; Convert TEXTAREA content to DOS line ending convention, again following the major browsers (possibly they also do this line ending normalization in some other case(s), I haven't checked) Modified: wwwsearch/ClientForm/trunk/ClientForm.py ============================================================================== --- wwwsearch/ClientForm/trunk/ClientForm.py (original) +++ wwwsearch/ClientForm/trunk/ClientForm.py Thu Oct 12 00:23:34 2006 @@ -126,6 +126,10 @@ _compress_re = re.compile(r"\s+") def compress_text(text): return _compress_re.sub(" ", text.strip()) +def normalize_line_endings(text): + return re.sub(r"(?:(?=)>") @@ -621,6 +621,17 @@ single_control = form.find_control(type="select", nr=1) self.assert_(single_control.value == ["1"]) + def test_close_base_tag(self): + # Benji York: a single newline immediately after a start tag is + # stripped by browsers, but not one immediately before an end tag. + # TEXTAREA content is converted to the DOS newline convention. + forms = ClientForm.ParseFile( + StringIO("
"), + "http://example.com/", + ) + ctl = forms[0].find_control(type="textarea") + self.assertEqual(ctl.value, "\r\nblah\r\n") + class DisabledTests(TestCase): def testOptgroup(self): @@ -3054,6 +3065,32 @@ self.assertEqual(req.ah, not auh) +class FunctionTests(TestCase): + + def test_normalize_line_endings(self): + def check(text, expected): + got = ClientForm.normalize_line_endings(text) + self.assertEqual(got, expected) + + # unix + check("foo\nbar", "foo\r\nbar") + check("foo\nbar\n", "foo\r\nbar\r\n") + # mac + check("foo\rbar", "foo\r\nbar") + check("foo\rbar\r", "foo\r\nbar\r\n") + # dos + check("foo\r\nbar", "foo\r\nbar") + check("foo\r\nbar\r\n", "foo\r\nbar\r\n") + + # inconsistent -- we just blithely convert anything that looks like a + # line ending to the DOS convention, following Firefox's behaviour when + # normalizing textarea content + check("foo\r\nbar\nbaz\rblah\r\n", "foo\r\nbar\r\nbaz\r\nblah\r\n") + + # pathological ;-O + check("\r\n\n\r\r\r\n", "\r\n"*5) + + def startswith(string, initial): if len(initial) > len(string): return False return string[:len(initial)] == initial From jjlee at codespeak.net Thu Oct 12 00:40:28 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 12 Oct 2006 00:40:28 +0200 (CEST) Subject: [wwwsearch-commits] r33201 - wwwsearch/ClientForm/trunk Message-ID: <20061011224028.24FB3101B7@code0.codespeak.net> Author: jjlee Date: Thu Oct 12 00:40:26 2006 New Revision: 33201 Modified: wwwsearch/ClientForm/trunk/ClientForm.py Log: Update to-do list Modified: wwwsearch/ClientForm/trunk/ClientForm.py ============================================================================== --- wwwsearch/ClientForm/trunk/ClientForm.py (original) +++ wwwsearch/ClientForm/trunk/ClientForm.py Thu Oct 12 00:40:26 2006 @@ -27,11 +27,8 @@ """ # XXX -# Remove unescape_attr method # Remove parser testing hack # safeUrl()-ize action -# Really should merge CC, CF, pp and mechanize as soon as mechanize -# goes to beta... # Add url attribute to ParseError # Switch to unicode throughout (would be 0.3.x) # See Wichert Akkerman's 2004-01-22 message to c.l.py. From jjlee at codespeak.net Thu Oct 12 00:47:19 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 12 Oct 2006 00:47:19 +0200 (CEST) Subject: [wwwsearch-commits] r33202 - wwwsearch/mechanize/trunk Message-ID: <20061011224719.AF8C6101C0@code0.codespeak.net> Author: jjlee Date: Thu Oct 12 00:47:18 2006 New Revision: 33202 Modified: wwwsearch/mechanize/trunk/README.html.in Log: Update to-do list Modified: wwwsearch/mechanize/trunk/README.html.in ============================================================================== --- wwwsearch/mechanize/trunk/README.html.in (original) +++ wwwsearch/mechanize/trunk/README.html.in Thu Oct 12 00:47:18 2006 @@ -286,11 +286,11 @@
  • Topological sort for handlers, instead of .handler_order attribute. Add new build_opener and deprecate the old one? -
  • Use RFC 3986 URL absolutization.
  • Test .any_response() two handlers case: ordering.
  • Test referer bugs (frags and don't add in redirect unless orig req had Referer)
  • Proper XHTML support! +
  • Bundle BeautifulSoup 2, until support version 3.
  • Fix BeautifulSoup support to use a single BeautifulSoup instance per page.
  • Test BeautifulSoup support better / fix encoding issue. From jjlee at codespeak.net Thu Oct 12 00:51:34 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 12 Oct 2006 00:51:34 +0200 (CEST) Subject: [wwwsearch-commits] r33203 - in wwwsearch/mechanize/trunk: . docs-in-progress mechanize Message-ID: <20061011225134.C5C02101B7@code0.codespeak.net> Author: jjlee Date: Thu Oct 12 00:51:33 2006 New Revision: 33203 Modified: wwwsearch/mechanize/trunk/README.html.in wwwsearch/mechanize/trunk/docs-in-progress/doc.rst wwwsearch/mechanize/trunk/mechanize/_mechanize.py wwwsearch/mechanize/trunk/setup.py Log: Increment version, and label as beta again since it needs another beta release Modified: wwwsearch/mechanize/trunk/README.html.in ============================================================================== --- wwwsearch/mechanize/trunk/README.html.in (original) +++ wwwsearch/mechanize/trunk/README.html.in Thu Oct 12 00:51:33 2006 @@ -444,7 +444,7 @@

    Development release.

      -@{version = "0.1.3"} +@{version = "0.1.4b"}
    • mechanize-@(version).tar.gz
    • mechanize-@(version).zip
    • Change Log (included in distribution) Modified: wwwsearch/mechanize/trunk/docs-in-progress/doc.rst ============================================================================== --- wwwsearch/mechanize/trunk/docs-in-progress/doc.rst (original) +++ wwwsearch/mechanize/trunk/docs-in-progress/doc.rst Thu Oct 12 00:51:33 2006 @@ -473,7 +473,7 @@ Development release ~~~~~~~~~~~~~~~~~~~ -@{version = "0.1.3"} +@{version = "0.1.4b"} - mechanize-@(version).tar.gz - mechanize-@(version).zip - Change Log (included in distribution) Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Thu Oct 12 00:51:33 2006 @@ -18,7 +18,7 @@ import _request import _rfc3986 -__version__ = (0, 1, 3, None, None) # 0.1.3 +__version__ = (0, 1, 4, "b", None) # 0.1.4b class BrowserStateError(Exception): pass class LinkNotFoundError(Exception): pass Modified: wwwsearch/mechanize/trunk/setup.py ============================================================================== --- wwwsearch/mechanize/trunk/setup.py (original) +++ wwwsearch/mechanize/trunk/setup.py Thu Oct 12 00:51:33 2006 @@ -52,7 +52,7 @@ ## VERSION_MATCH = re.search(r'__version__ = \((.*)\)', ## open("mechanize/_mechanize.py").read()) ## VERSION = unparse_version(str_to_tuple(VERSION_MATCH.group(1))) -VERSION = "0.1.3" +VERSION = "0.1.4b" INSTALL_REQUIRES = [ "ClientForm>=0.2.2, ==dev", # there's no dependency on BeautifulSoup, but people get confused when From jjlee at codespeak.net Thu Oct 12 01:00:07 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 12 Oct 2006 01:00:07 +0200 (CEST) Subject: [wwwsearch-commits] r33204 - wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T22:57:46 Message-ID: <20061011230007.6AC56101B3@code0.codespeak.net> Author: jjlee Date: Thu Oct 12 01:00:05 2006 New Revision: 33204 Added: wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T22:57:46/ - copied from r33203, wwwsearch/ClientForm/trunk/ Modified: wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T22:57:46/setup.cfg Log: Tagged ClientForm (trunk -r HEAD, from working copy) release 0.2.3-2006-10-11T22:57:46 Modified: wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T22:57:46/setup.cfg ============================================================================== --- wwwsearch/ClientForm/trunk/setup.cfg (original) +++ wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T22:57:46/setup.cfg Thu Oct 12 01:00:05 2006 @@ -1,3 +0,0 @@ -[egg_info] -tag_build = .dev -tag_svn_revision = 1 From jjlee at codespeak.net Thu Oct 12 01:15:47 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 12 Oct 2006 01:15:47 +0200 (CEST) Subject: [wwwsearch-commits] r33205 - wwwsearch/ClientForm/trunk Message-ID: <20061011231547.7167E101B3@code0.codespeak.net> Author: jjlee Date: Thu Oct 12 01:15:45 2006 New Revision: 33205 Added: wwwsearch/ClientForm/trunk/ez_setup.py Modified: wwwsearch/ClientForm/trunk/ (props changed) Log: Update ez_setup bootstrap Added: wwwsearch/ClientForm/trunk/ez_setup.py ============================================================================== --- (empty file) +++ wwwsearch/ClientForm/trunk/ez_setup.py Thu Oct 12 01:15:45 2006 @@ -0,0 +1,222 @@ +#!python +"""Bootstrap setuptools installation + +If you want to use setuptools in your package's setup.py, just include this +file in the same directory with it, and add this to the top of your setup.py:: + + from ez_setup import use_setuptools + use_setuptools() + +If you want to require a specific version of setuptools, set a download +mirror, or use an alternate download directory, you can do so by supplying +the appropriate options to ``use_setuptools()``. + +This file can also be run as a script to install or upgrade setuptools. +""" +import sys +DEFAULT_VERSION = "0.6c3" +DEFAULT_URL = "http://cheeseshop.python.org/packages/%s/s/setuptools/" % sys.version[:3] + +md5_data = { + 'setuptools-0.6b1-py2.3.egg': '8822caf901250d848b996b7f25c6e6ca', + 'setuptools-0.6b1-py2.4.egg': 'b79a8a403e4502fbb85ee3f1941735cb', + 'setuptools-0.6b2-py2.3.egg': '5657759d8a6d8fc44070a9d07272d99b', + 'setuptools-0.6b2-py2.4.egg': '4996a8d169d2be661fa32a6e52e4f82a', + 'setuptools-0.6b3-py2.3.egg': 'bb31c0fc7399a63579975cad9f5a0618', + 'setuptools-0.6b3-py2.4.egg': '38a8c6b3d6ecd22247f179f7da669fac', + 'setuptools-0.6b4-py2.3.egg': '62045a24ed4e1ebc77fe039aa4e6f7e5', + 'setuptools-0.6b4-py2.4.egg': '4cb2a185d228dacffb2d17f103b3b1c4', + 'setuptools-0.6c1-py2.3.egg': 'b3f2b5539d65cb7f74ad79127f1a908c', + 'setuptools-0.6c1-py2.4.egg': 'b45adeda0667d2d2ffe14009364f2a4b', + 'setuptools-0.6c2-py2.3.egg': 'f0064bf6aa2b7d0f3ba0b43f20817c27', + 'setuptools-0.6c2-py2.4.egg': '616192eec35f47e8ea16cd6a122b7277', + 'setuptools-0.6c3-py2.3.egg': 'f181fa125dfe85a259c9cd6f1d7b78fa', + 'setuptools-0.6c3-py2.4.egg': 'e0ed74682c998bfb73bf803a50e7b71e', + 'setuptools-0.6c3-py2.5.egg': 'abef16fdd61955514841c7c6bd98965e', +} + +import sys, os + +def _validate_md5(egg_name, data): + if egg_name in md5_data: + from md5 import md5 + digest = md5(data).hexdigest() + if digest != md5_data[egg_name]: + print >>sys.stderr, ( + "md5 validation of %s failed! (Possible download problem?)" + % egg_name + ) + sys.exit(2) + return data + + +def use_setuptools( + version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir, + download_delay=15 +): + """Automatically find/download setuptools and make it available on sys.path + + `version` should be a valid setuptools version number that is available + as an egg for download under the `download_base` URL (which should end with + a '/'). `to_dir` is the directory where setuptools will be downloaded, if + it is not already available. If `download_delay` is specified, it should + be the number of seconds that will be paused before initiating a download, + should one be required. If an older version of setuptools is installed, + this routine will print a message to ``sys.stderr`` and raise SystemExit in + an attempt to abort the calling script. + """ + try: + import setuptools + if setuptools.__version__ == '0.0.1': + print >>sys.stderr, ( + "You have an obsolete version of setuptools installed. Please\n" + "remove it from your system entirely before rerunning this script." + ) + sys.exit(2) + except ImportError: + egg = download_setuptools(version, download_base, to_dir, download_delay) + sys.path.insert(0, egg) + import setuptools; setuptools.bootstrap_install_from = egg + + import pkg_resources + try: + pkg_resources.require("setuptools>="+version) + + except pkg_resources.VersionConflict, e: + # XXX could we install in a subprocess here? + print >>sys.stderr, ( + "The required version of setuptools (>=%s) is not available, and\n" + "can't be installed while this script is running. Please install\n" + " a more recent version first.\n\n(Currently using %r)" + ) % (version, e.args[0]) + sys.exit(2) + +def download_setuptools( + version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir, + delay = 15 +): + """Download setuptools from a specified location and return its filename + + `version` should be a valid setuptools version number that is available + as an egg for download under the `download_base` URL (which should end + with a '/'). `to_dir` is the directory where the egg will be downloaded. + `delay` is the number of seconds to pause before an actual download attempt. + """ + import urllib2, shutil + egg_name = "setuptools-%s-py%s.egg" % (version,sys.version[:3]) + url = download_base + egg_name + saveto = os.path.join(to_dir, egg_name) + src = dst = None + if not os.path.exists(saveto): # Avoid repeated downloads + try: + from distutils import log + if delay: + log.warn(""" +--------------------------------------------------------------------------- +This script requires setuptools version %s to run (even to display +help). I will attempt to download it for you (from +%s), but +you may need to enable firewall access for this script first. +I will start the download in %d seconds. + +(Note: if this machine does not have network access, please obtain the file + + %s + +and place it in this directory before rerunning this script.) +---------------------------------------------------------------------------""", + version, download_base, delay, url + ); from time import sleep; sleep(delay) + log.warn("Downloading %s", url) + src = urllib2.urlopen(url) + # Read/write all in one block, so we don't create a corrupt file + # if the download is interrupted. + data = _validate_md5(egg_name, src.read()) + dst = open(saveto,"wb"); dst.write(data) + finally: + if src: src.close() + if dst: dst.close() + return os.path.realpath(saveto) + +def main(argv, version=DEFAULT_VERSION): + """Install or upgrade setuptools and EasyInstall""" + + try: + import setuptools + except ImportError: + egg = None + try: + egg = download_setuptools(version, delay=0) + sys.path.insert(0,egg) + from setuptools.command.easy_install import main + return main(list(argv)+[egg]) # we're done here + finally: + if egg and os.path.exists(egg): + os.unlink(egg) + else: + if setuptools.__version__ == '0.0.1': + # tell the user to uninstall obsolete version + use_setuptools(version) + + req = "setuptools>="+version + import pkg_resources + try: + pkg_resources.require(req) + except pkg_resources.VersionConflict: + try: + from setuptools.command.easy_install import main + except ImportError: + from easy_install import main + main(list(argv)+[download_setuptools(delay=0)]) + sys.exit(0) # try to force an exit + else: + if argv: + from setuptools.command.easy_install import main + main(argv) + else: + print "Setuptools version",version,"or greater has been installed." + print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)' + + + +def update_md5(filenames): + """Update our built-in md5 registry""" + + import re + from md5 import md5 + + for name in filenames: + base = os.path.basename(name) + f = open(name,'rb') + md5_data[base] = md5(f.read()).hexdigest() + f.close() + + data = [" %r: %r,\n" % it for it in md5_data.items()] + data.sort() + repl = "".join(data) + + import inspect + srcfile = inspect.getsourcefile(sys.modules[__name__]) + f = open(srcfile, 'rb'); src = f.read(); f.close() + + match = re.search("\nmd5_data = {\n([^}]+)}", src) + if not match: + print >>sys.stderr, "Internal error!" + sys.exit(2) + + src = src[:match.start(1)] + repl + src[match.end(1):] + f = open(srcfile,'w') + f.write(src) + f.close() + + +if __name__=='__main__': + if len(sys.argv)>2 and sys.argv[1]=='--md5update': + update_md5(sys.argv[2:]) + else: + main(sys.argv[1:]) + + + + + From jjlee at codespeak.net Thu Oct 12 01:17:40 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 12 Oct 2006 01:17:40 +0200 (CEST) Subject: [wwwsearch-commits] r33206 - wwwsearch/mechanize/trunk Message-ID: <20061011231740.73FBE101B7@code0.codespeak.net> Author: jjlee Date: Thu Oct 12 01:17:36 2006 New Revision: 33206 Added: wwwsearch/mechanize/trunk/ez_setup.py Modified: wwwsearch/mechanize/trunk/ (props changed) Log: Update ez_setup bootstrap Added: wwwsearch/mechanize/trunk/ez_setup.py ============================================================================== --- (empty file) +++ wwwsearch/mechanize/trunk/ez_setup.py Thu Oct 12 01:17:36 2006 @@ -0,0 +1,222 @@ +#!python +"""Bootstrap setuptools installation + +If you want to use setuptools in your package's setup.py, just include this +file in the same directory with it, and add this to the top of your setup.py:: + + from ez_setup import use_setuptools + use_setuptools() + +If you want to require a specific version of setuptools, set a download +mirror, or use an alternate download directory, you can do so by supplying +the appropriate options to ``use_setuptools()``. + +This file can also be run as a script to install or upgrade setuptools. +""" +import sys +DEFAULT_VERSION = "0.6c3" +DEFAULT_URL = "http://cheeseshop.python.org/packages/%s/s/setuptools/" % sys.version[:3] + +md5_data = { + 'setuptools-0.6b1-py2.3.egg': '8822caf901250d848b996b7f25c6e6ca', + 'setuptools-0.6b1-py2.4.egg': 'b79a8a403e4502fbb85ee3f1941735cb', + 'setuptools-0.6b2-py2.3.egg': '5657759d8a6d8fc44070a9d07272d99b', + 'setuptools-0.6b2-py2.4.egg': '4996a8d169d2be661fa32a6e52e4f82a', + 'setuptools-0.6b3-py2.3.egg': 'bb31c0fc7399a63579975cad9f5a0618', + 'setuptools-0.6b3-py2.4.egg': '38a8c6b3d6ecd22247f179f7da669fac', + 'setuptools-0.6b4-py2.3.egg': '62045a24ed4e1ebc77fe039aa4e6f7e5', + 'setuptools-0.6b4-py2.4.egg': '4cb2a185d228dacffb2d17f103b3b1c4', + 'setuptools-0.6c1-py2.3.egg': 'b3f2b5539d65cb7f74ad79127f1a908c', + 'setuptools-0.6c1-py2.4.egg': 'b45adeda0667d2d2ffe14009364f2a4b', + 'setuptools-0.6c2-py2.3.egg': 'f0064bf6aa2b7d0f3ba0b43f20817c27', + 'setuptools-0.6c2-py2.4.egg': '616192eec35f47e8ea16cd6a122b7277', + 'setuptools-0.6c3-py2.3.egg': 'f181fa125dfe85a259c9cd6f1d7b78fa', + 'setuptools-0.6c3-py2.4.egg': 'e0ed74682c998bfb73bf803a50e7b71e', + 'setuptools-0.6c3-py2.5.egg': 'abef16fdd61955514841c7c6bd98965e', +} + +import sys, os + +def _validate_md5(egg_name, data): + if egg_name in md5_data: + from md5 import md5 + digest = md5(data).hexdigest() + if digest != md5_data[egg_name]: + print >>sys.stderr, ( + "md5 validation of %s failed! (Possible download problem?)" + % egg_name + ) + sys.exit(2) + return data + + +def use_setuptools( + version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir, + download_delay=15 +): + """Automatically find/download setuptools and make it available on sys.path + + `version` should be a valid setuptools version number that is available + as an egg for download under the `download_base` URL (which should end with + a '/'). `to_dir` is the directory where setuptools will be downloaded, if + it is not already available. If `download_delay` is specified, it should + be the number of seconds that will be paused before initiating a download, + should one be required. If an older version of setuptools is installed, + this routine will print a message to ``sys.stderr`` and raise SystemExit in + an attempt to abort the calling script. + """ + try: + import setuptools + if setuptools.__version__ == '0.0.1': + print >>sys.stderr, ( + "You have an obsolete version of setuptools installed. Please\n" + "remove it from your system entirely before rerunning this script." + ) + sys.exit(2) + except ImportError: + egg = download_setuptools(version, download_base, to_dir, download_delay) + sys.path.insert(0, egg) + import setuptools; setuptools.bootstrap_install_from = egg + + import pkg_resources + try: + pkg_resources.require("setuptools>="+version) + + except pkg_resources.VersionConflict, e: + # XXX could we install in a subprocess here? + print >>sys.stderr, ( + "The required version of setuptools (>=%s) is not available, and\n" + "can't be installed while this script is running. Please install\n" + " a more recent version first.\n\n(Currently using %r)" + ) % (version, e.args[0]) + sys.exit(2) + +def download_setuptools( + version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir, + delay = 15 +): + """Download setuptools from a specified location and return its filename + + `version` should be a valid setuptools version number that is available + as an egg for download under the `download_base` URL (which should end + with a '/'). `to_dir` is the directory where the egg will be downloaded. + `delay` is the number of seconds to pause before an actual download attempt. + """ + import urllib2, shutil + egg_name = "setuptools-%s-py%s.egg" % (version,sys.version[:3]) + url = download_base + egg_name + saveto = os.path.join(to_dir, egg_name) + src = dst = None + if not os.path.exists(saveto): # Avoid repeated downloads + try: + from distutils import log + if delay: + log.warn(""" +--------------------------------------------------------------------------- +This script requires setuptools version %s to run (even to display +help). I will attempt to download it for you (from +%s), but +you may need to enable firewall access for this script first. +I will start the download in %d seconds. + +(Note: if this machine does not have network access, please obtain the file + + %s + +and place it in this directory before rerunning this script.) +---------------------------------------------------------------------------""", + version, download_base, delay, url + ); from time import sleep; sleep(delay) + log.warn("Downloading %s", url) + src = urllib2.urlopen(url) + # Read/write all in one block, so we don't create a corrupt file + # if the download is interrupted. + data = _validate_md5(egg_name, src.read()) + dst = open(saveto,"wb"); dst.write(data) + finally: + if src: src.close() + if dst: dst.close() + return os.path.realpath(saveto) + +def main(argv, version=DEFAULT_VERSION): + """Install or upgrade setuptools and EasyInstall""" + + try: + import setuptools + except ImportError: + egg = None + try: + egg = download_setuptools(version, delay=0) + sys.path.insert(0,egg) + from setuptools.command.easy_install import main + return main(list(argv)+[egg]) # we're done here + finally: + if egg and os.path.exists(egg): + os.unlink(egg) + else: + if setuptools.__version__ == '0.0.1': + # tell the user to uninstall obsolete version + use_setuptools(version) + + req = "setuptools>="+version + import pkg_resources + try: + pkg_resources.require(req) + except pkg_resources.VersionConflict: + try: + from setuptools.command.easy_install import main + except ImportError: + from easy_install import main + main(list(argv)+[download_setuptools(delay=0)]) + sys.exit(0) # try to force an exit + else: + if argv: + from setuptools.command.easy_install import main + main(argv) + else: + print "Setuptools version",version,"or greater has been installed." + print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)' + + + +def update_md5(filenames): + """Update our built-in md5 registry""" + + import re + from md5 import md5 + + for name in filenames: + base = os.path.basename(name) + f = open(name,'rb') + md5_data[base] = md5(f.read()).hexdigest() + f.close() + + data = [" %r: %r,\n" % it for it in md5_data.items()] + data.sort() + repl = "".join(data) + + import inspect + srcfile = inspect.getsourcefile(sys.modules[__name__]) + f = open(srcfile, 'rb'); src = f.read(); f.close() + + match = re.search("\nmd5_data = {\n([^}]+)}", src) + if not match: + print >>sys.stderr, "Internal error!" + sys.exit(2) + + src = src[:match.start(1)] + repl + src[match.end(1):] + f = open(srcfile,'w') + f.write(src) + f.close() + + +if __name__=='__main__': + if len(sys.argv)>2 and sys.argv[1]=='--md5update': + update_md5(sys.argv[2:]) + else: + main(sys.argv[1:]) + + + + + From jjlee at codespeak.net Thu Oct 12 01:30:41 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 12 Oct 2006 01:30:41 +0200 (CEST) Subject: [wwwsearch-commits] r33207 - wwwsearch/ClientForm/common Message-ID: <20061011233041.121EF101AC@code0.codespeak.net> Author: jjlee Date: Thu Oct 12 01:30:39 2006 New Revision: 33207 Modified: wwwsearch/ClientForm/common/ChangeLog Log: Update changelog Modified: wwwsearch/ClientForm/common/ChangeLog ============================================================================== --- wwwsearch/ClientForm/common/ChangeLog (original) +++ wwwsearch/ClientForm/common/ChangeLog Thu Oct 12 01:30:39 2006 @@ -1,6 +1,25 @@ This isn't really in proper GNU ChangeLog format, it just happens to look that way. +2006-10-12 John J Lee + * 0.2.3 release: + * Fix entity reference / character reference handling for + Python 2.5 . + * Nameless list controls are now never successful. + * List controls used to get inappropriately .merge_control()ed + with other controls, or parsing would raise AmbiguityError. + That's fixed now. + * Handle line endings in element content the same way browsers do + (strip exactly one leading linebreaks, if any leading linebreaks + are present) (patch from Benji York). + * Convert TEXTAREA content to DOS line ending convention, again + following the major browsers. + * Allow mechanize to supply URL join / parse / unparse functions, + to allow mechanize to follow RFC 3986, thus fixing some URL + processing bugs. ClientForm should do the same; probably I + should merge the two projects after final mechanize release. + * Doc fixes. + 2006-03-22 John J Lee * 0.2.2 release: * Stop trying to record precise dates in changelog, since that's From jjlee at codespeak.net Thu Oct 12 01:33:14 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 12 Oct 2006 01:33:14 +0200 (CEST) Subject: [wwwsearch-commits] r33208 - wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T22:57:46 Message-ID: <20061011233314.BDD49101AC@code0.codespeak.net> Author: jjlee Date: Thu Oct 12 01:33:12 2006 New Revision: 33208 Removed: wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T22:57:46/ Log: Remove unused tag From jjlee at codespeak.net Thu Oct 12 01:33:46 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 12 Oct 2006 01:33:46 +0200 (CEST) Subject: [wwwsearch-commits] r33209 - wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T23:31:26 Message-ID: <20061011233346.BBC30101B3@code0.codespeak.net> Author: jjlee Date: Thu Oct 12 01:33:45 2006 New Revision: 33209 Added: wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T23:31:26/ - copied from r33208, wwwsearch/ClientForm/trunk/ Modified: wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T23:31:26/setup.cfg Log: Tagged ClientForm (trunk -r HEAD, from working copy) release 0.2.3-2006-10-11T23:31:26 Modified: wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T23:31:26/setup.cfg ============================================================================== --- wwwsearch/ClientForm/trunk/setup.cfg (original) +++ wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T23:31:26/setup.cfg Thu Oct 12 01:33:45 2006 @@ -1,3 +0,0 @@ -[egg_info] -tag_build = .dev -tag_svn_revision = 1 From jjlee at codespeak.net Thu Oct 12 01:36:08 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 12 Oct 2006 01:36:08 +0200 (CEST) Subject: [wwwsearch-commits] r33210 - wwwsearch/ClientForm/trunk Message-ID: <20061011233608.22703101B3@code0.codespeak.net> Author: jjlee Date: Thu Oct 12 01:36:05 2006 New Revision: 33210 Modified: wwwsearch/ClientForm/trunk/test.py Log: Fix 2.0 incompatibility Modified: wwwsearch/ClientForm/trunk/test.py ============================================================================== --- wwwsearch/ClientForm/trunk/test.py (original) +++ wwwsearch/ClientForm/trunk/test.py Thu Oct 12 01:36:05 2006 @@ -3068,7 +3068,7 @@ class FunctionTests(TestCase): def test_normalize_line_endings(self): - def check(text, expected): + def check(text, expected, self=self): got = ClientForm.normalize_line_endings(text) self.assertEqual(got, expected) From jjlee at codespeak.net Thu Oct 12 01:36:40 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 12 Oct 2006 01:36:40 +0200 (CEST) Subject: [wwwsearch-commits] r33211 - wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T23:31:26 Message-ID: <20061011233640.65824101B7@code0.codespeak.net> Author: jjlee Date: Thu Oct 12 01:36:36 2006 New Revision: 33211 Removed: wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T23:31:26/ Log: Remove unused tag From jjlee at codespeak.net Thu Oct 12 01:36:56 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 12 Oct 2006 01:36:56 +0200 (CEST) Subject: [wwwsearch-commits] r33212 - wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T23:34:34 Message-ID: <20061011233656.D248E101BC@code0.codespeak.net> Author: jjlee Date: Thu Oct 12 01:36:53 2006 New Revision: 33212 Added: wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T23:34:34/ - copied from r33211, wwwsearch/ClientForm/trunk/ Modified: wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T23:34:34/setup.cfg Log: Tagged ClientForm (trunk -r HEAD, from working copy) release 0.2.3-2006-10-11T23:34:34 Modified: wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T23:34:34/setup.cfg ============================================================================== --- wwwsearch/ClientForm/trunk/setup.cfg (original) +++ wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T23:34:34/setup.cfg Thu Oct 12 01:36:53 2006 @@ -1,3 +0,0 @@ -[egg_info] -tag_build = .dev -tag_svn_revision = 1 From jjlee at codespeak.net Sat Oct 14 01:04:11 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 14 Oct 2006 01:04:11 +0200 (CEST) Subject: [wwwsearch-commits] r33276 - wwwsearch/ClientForm/trunk Message-ID: <20061013230411.4A475100ED@code0.codespeak.net> Author: jjlee Date: Sat Oct 14 01:04:09 2006 New Revision: 33276 Modified: wwwsearch/ClientForm/trunk/MANIFEST.in Log: Fix manifest (was giving a warning when installing egg) Modified: wwwsearch/ClientForm/trunk/MANIFEST.in ============================================================================== --- wwwsearch/ClientForm/trunk/MANIFEST.in (original) +++ wwwsearch/ClientForm/trunk/MANIFEST.in Sat Oct 14 01:04:09 2006 @@ -10,4 +10,3 @@ include *.py recursive-include testdata *.html recursive-include examples *.dat *.txt *.html *.cgi *.py -recursive-include ez_setup *.py From jjlee at codespeak.net Sat Oct 14 01:04:30 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 14 Oct 2006 01:04:30 +0200 (CEST) Subject: [wwwsearch-commits] r33277 - wwwsearch/ClientForm/tag/release/0.2.3-2006-10-13T23:02:10 Message-ID: <20061013230430.1B2EE100F0@code0.codespeak.net> Author: jjlee Date: Sat Oct 14 01:04:29 2006 New Revision: 33277 Added: wwwsearch/ClientForm/tag/release/0.2.3-2006-10-13T23:02:10/ - copied from r33276, wwwsearch/ClientForm/trunk/ Modified: wwwsearch/ClientForm/tag/release/0.2.3-2006-10-13T23:02:10/setup.cfg Log: Tagged ClientForm (trunk -r HEAD, from working copy) release 0.2.3-2006-10-13T23:02:10 Modified: wwwsearch/ClientForm/tag/release/0.2.3-2006-10-13T23:02:10/setup.cfg ============================================================================== --- wwwsearch/ClientForm/trunk/setup.cfg (original) +++ wwwsearch/ClientForm/tag/release/0.2.3-2006-10-13T23:02:10/setup.cfg Sat Oct 14 01:04:29 2006 @@ -1,3 +0,0 @@ -[egg_info] -tag_build = .dev -tag_svn_revision = 1 From jjlee at codespeak.net Sat Oct 14 01:06:33 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 14 Oct 2006 01:06:33 +0200 (CEST) Subject: [wwwsearch-commits] r33279 - wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T23:34:34 Message-ID: <20061013230633.843AA100F4@code0.codespeak.net> Author: jjlee Date: Sat Oct 14 01:06:32 2006 New Revision: 33279 Removed: wwwsearch/ClientForm/tag/release/0.2.3-2006-10-11T23:34:34/ Log: Remove unused tag From jjlee at codespeak.net Sat Oct 14 19:16:27 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 14 Oct 2006 19:16:27 +0200 (CEST) Subject: [wwwsearch-commits] r33284 - in wwwsearch/mechanize/trunk: . mechanize Message-ID: <20061014171627.5545B100F8@code0.codespeak.net> Author: jjlee Date: Sat Oct 14 19:16:24 2006 New Revision: 33284 Modified: wwwsearch/mechanize/trunk/0.1-changes.txt wwwsearch/mechanize/trunk/README.html.in wwwsearch/mechanize/trunk/functional_tests.py wwwsearch/mechanize/trunk/mechanize/__init__.py wwwsearch/mechanize/trunk/mechanize/_mechanize.py wwwsearch/mechanize/trunk/mechanize/_response.py wwwsearch/mechanize/trunk/mechanize/_useragent.py Log: Reinstate set_seekable_responses() (had to create an additional class and change the base class of Browser to do so). Functional tests not run since SF is down :-( Modified: wwwsearch/mechanize/trunk/0.1-changes.txt ============================================================================== --- wwwsearch/mechanize/trunk/0.1-changes.txt (original) +++ wwwsearch/mechanize/trunk/0.1-changes.txt Sat Oct 14 19:16:24 2006 @@ -52,7 +52,9 @@ - mechanize.Browser.default_encoding is gone. - mechanize.Browser.set_seekable_responses() is gone (they're always - .seek()able). + .seek()able). Browser and UserAgent now both inherit from + mechanize.UserAgentBase, and UserAgent is now there only to add the + single method .set_seekable_responses(). - Added Browser.encoding(). Modified: wwwsearch/mechanize/trunk/README.html.in ============================================================================== --- wwwsearch/mechanize/trunk/README.html.in (original) +++ wwwsearch/mechanize/trunk/README.html.in Sat Oct 14 19:16:24 2006 @@ -42,16 +42,18 @@
      • mechanize.Browser is a subclass of - mechanize.UserAgent, which is, in turn, a subclass of + mechanize.UserAgentBase, which is, in turn, a subclass of urllib2.OpenerDirector (in fact, of mechanize.OpenerDirector), so:
        • any URL can be opened, not just http: -
        • mechanize.UserAgent offers easy dynamic configuration of - user-agent features like protocol, cookie, redirection and - robots.txt handling, without having to make a new - OpenerDirector each time, e.g. by calling - build_opener(). + +
        • mechanize.UserAgentBase offers easy dynamic + configuration of user-agent features like protocol, cookie, + redirection and robots.txt handling, without having + to make a new OpenerDirector each time, e.g. by + calling build_opener(). +
      • Easy HTML form filling, using ClientForm interface. @@ -181,6 +183,21 @@ way. + +

        UserAgent vs UserAgentBase

        + +mechanize.UserAgent is a trivial subclass of +mechanize.UserAgentBase, adding just one method, +.set_seekable_responses(), which allows switching off the +addition of the .seek() method to response objects: + +@{colorize(""" +import mechanize +response = mechanize.urlopen("http://www.example.com/") +print response.read() +""")} + +

        Compatibility

        Modified: wwwsearch/mechanize/trunk/functional_tests.py ============================================================================== --- wwwsearch/mechanize/trunk/functional_tests.py (original) +++ wwwsearch/mechanize/trunk/functional_tests.py Sat Oct 14 19:16:24 2006 @@ -82,6 +82,18 @@ test_state(self.browser) self.assert_("GeneralFAQ.html" in r.read(2048)) + def test_non_seekable(self): + # check everything still works without response_seek_wrapper and + # the .seek() method on response objects + ua = mechanize.UserAgent() + ua.set_seekable_responses(False) + ua.set_handle_equiv(False) + ua._maybe_reindex_handlers() + response = ua.open('http://wwwsearch.sourceforge.net/') + self.failIf(hasattr(response, "seek")) + data = response.read() + self.assert_("Python bits" in data) + class ResponseTests(TestCase): Modified: wwwsearch/mechanize/trunk/mechanize/__init__.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/__init__.py (original) +++ wwwsearch/mechanize/trunk/mechanize/__init__.py Sat Oct 14 19:16:24 2006 @@ -66,6 +66,7 @@ 'USE_BARE_EXCEPT', 'UnknownHandler', 'UserAgent', + 'UserAgentBase', 'XHTMLCompatibleHeadParser', '__version__', 'build_opener', @@ -86,7 +87,7 @@ BrowserStateError, LinkNotFoundError, FormNotFoundError # configurable URL-opener interface -from _useragent import UserAgent +from _useragent import UserAgentBase, UserAgent from _html import \ Link, \ Factory, DefaultFactory, RobustFactory, \ Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Sat Oct 14 19:16:24 2006 @@ -11,7 +11,7 @@ import urllib2, sys, copy, re -from _useragent import UserAgent +from _useragent import UserAgentBase from _html import DefaultFactory from _response import response_seek_wrapper, closeable_response import _upgrade @@ -53,7 +53,7 @@ del self._history[:] -class Browser(UserAgent): +class Browser(UserAgentBase): """Browser-like class with support for history, forms and links. BrowserStateError is raised whenever the browser is in the wrong state to @@ -68,9 +68,9 @@ """ - handler_classes = UserAgent.handler_classes.copy() + handler_classes = UserAgentBase.handler_classes.copy() handler_classes["_response_upgrade"] = _upgrade.ResponseUpgradeProcessor - default_others = copy.copy(UserAgent.default_others) + default_others = copy.copy(UserAgentBase.default_others) default_others.append("_response_upgrade") def __init__(self, @@ -83,8 +83,8 @@ Only named arguments should be passed to this constructor. factory: object implementing the mechanize.Factory interface. - history: object implementing the mechanize.History interface. Note this - interface is still experimental and may change in future. + history: object implementing the mechanize.History interface. Note + this interface is still experimental and may change in future. request_class: Request class to use. Defaults to mechanize.Request by default for Pythons older than 2.4, urllib2.Request otherwise. @@ -116,10 +116,11 @@ self.request = None self.set_response(None) - UserAgent.__init__(self) # do this last to avoid __getattr__ problems + # do this last to avoid __getattr__ problems + UserAgentBase.__init__(self) def close(self): - UserAgent.close(self) + UserAgentBase.close(self) if self._response is not None: self._response.close() if self._history is not None: @@ -164,7 +165,8 @@ # relative URL if self._response is None: raise BrowserStateError( - "can't fetch relative reference: not viewing any document") + "can't fetch relative reference: " + "not viewing any document") url = _rfc3986.urljoin(self._response.geturl(), url) request = self._request(url, data, visit) @@ -178,12 +180,13 @@ if self.request is not None and update_history: self._history.add(self.request, self._response) self._response = None - # we want self.request to be assigned even if UserAgent.open fails + # we want self.request to be assigned even if UserAgentBase.open + # fails self.request = request success = True try: - response = UserAgent.open(self, request, data) + response = UserAgentBase.open(self, request, data) except urllib2.HTTPError, error: success = False if error.fp is None: # not a response Modified: wwwsearch/mechanize/trunk/mechanize/_response.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_response.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_response.py Sat Oct 14 19:16:24 2006 @@ -1,4 +1,13 @@ -"""(Mostly HTTP) response classes. +"""Response classes. + +The seek_wrapper code is not used if you're using UserAgent with +.set_seekable_responses(False), or if you're using the urllib2-level interface +without SeekableProcessor or HTTPEquivProcessor. Class closeable_response is +instantiated by some handlers (AbstractHTTPHandler), but the closeable_response +interface is only depended upon by Browser-level code. Function +upgrade_response is only used if you're using Browser or +ResponseUpgradeProcessor. + Copyright 2006 John J. Lee Modified: wwwsearch/mechanize/trunk/mechanize/_useragent.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_useragent.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_useragent.py Sat Oct 14 19:16:24 2006 @@ -35,18 +35,24 @@ https_request = http_request -class UserAgent(OpenerDirector): +class UserAgentBase(OpenerDirector): """Convenient user-agent class. Do not use .add_handler() to add a handler for something already dealt with by this code. + The only reason at present for the distinction between UserAgent and + UserAgentBase is so that classes that depend on .seek()able responses + (e.g. mechanize.Browser) can inherit from UserAgentBase. The subclass + UserAgent exposes a .set_seekable_responses() method that allows switching + off the adding of a .seek() method to responses. + Public attributes: addheaders: list of (name, value) pairs specifying headers to send with every request, unless they are overridden in the Request instance. - >>> ua = UserAgent() + >>> ua = UserAgentBase() >>> ua.addheaders = [ ... ("User-agent", "Mozilla/5.0 (compatible)"), ... ("From", "responsible.person at example.com")] @@ -349,3 +355,10 @@ if newhandler is not None: self.add_handler(newhandler) self._ua_handlers[name] = newhandler + + +class UserAgent(UserAgentBase): + + def set_seekable_responses(self, handle): + """Make response objects .seek()able.""" + self._set_handler("_seek", handle) From jjlee at codespeak.net Sat Oct 14 20:52:20 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 14 Oct 2006 20:52:20 +0200 (CEST) Subject: [wwwsearch-commits] r33285 - wwwsearch/ClientForm/trunk Message-ID: <20061014185220.80870100F8@code0.codespeak.net> Author: jjlee Date: Sat Oct 14 20:52:18 2006 New Revision: 33285 Modified: wwwsearch/ClientForm/trunk/ClientForm.py Log: Stupid hack to allow mechanize to get ClientForm to use a copy of BeautifulSoup bundled with mechanize (bother, this means I have to release another ClientForm) Modified: wwwsearch/ClientForm/trunk/ClientForm.py ============================================================================== --- wwwsearch/ClientForm/trunk/ClientForm.py (original) +++ wwwsearch/ClientForm/trunk/ClientForm.py Sat Oct 14 20:52:18 2006 @@ -838,13 +838,14 @@ sgmllib.SGMLParser.__init__(self) _AbstractFormParser.__init__(self, entitydefs, encoding) -try: - if sys.version_info[:2] < (2, 2): - raise ImportError # BeautifulSoup uses generators - import BeautifulSoup -except ImportError: - pass -else: + +# sigh, must support mechanize by allowing dynamic creation of classes based on +# its bundled copy of BeautifulSoup (which was necessary because of dependency +# problems) + +def _create_bs_classes(bs, + icbinbs, + ): class _AbstractBSFormParser(_AbstractSgmllibParser): bs_base_class = None def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): @@ -854,18 +855,33 @@ _AbstractFormParser.handle_data(self, data) self.bs_base_class.handle_data(self, data) - class RobustFormParser(_AbstractBSFormParser, BeautifulSoup.BeautifulSoup): + class RobustFormParser(_AbstractBSFormParser, bs): """Tries to be highly tolerant of incorrect HTML.""" - bs_base_class = BeautifulSoup.BeautifulSoup - class NestingRobustFormParser(_AbstractBSFormParser, - BeautifulSoup.ICantBelieveItsBeautifulSoup): + pass + RobustFormParser.bs_base_class = bs + class NestingRobustFormParser(_AbstractBSFormParser, icbinbs): """Tries to be highly tolerant of incorrect HTML. Different from RobustFormParser in that it more often guesses nesting above missing end tags (see BeautifulSoup docs). """ - bs_base_class = BeautifulSoup.ICantBelieveItsBeautifulSoup + pass + NestingRobustFormParser.bs_base_class = icbinbs + + return RobustFormParser, NestingRobustFormParser + +try: + if sys.version_info[:2] < (2, 2): + raise ImportError # BeautifulSoup uses generators + import BeautifulSoup +except ImportError: + pass +else: + RobustFormParser, NestingRobustFormParser = _create_bs_classes( + BeautifulSoup.BeautifulSoup, BeautifulSoup.ICantBelieveItsBeautifulSoup + ) + #FormParser = XHTMLCompatibleFormParser # testing hack #FormParser = RobustFormParser # testing hack From jjlee at codespeak.net Sat Oct 14 21:01:53 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 14 Oct 2006 21:01:53 +0200 (CEST) Subject: [wwwsearch-commits] r33287 - wwwsearch/mechanize/trunk Message-ID: <20061014190153.137AC100F8@code0.codespeak.net> Author: jjlee Date: Sat Oct 14 21:01:53 2006 New Revision: 33287 Modified: wwwsearch/mechanize/trunk/README.html.in Log: Add a FAQ Modified: wwwsearch/mechanize/trunk/README.html.in ============================================================================== --- wwwsearch/mechanize/trunk/README.html.in (original) +++ wwwsearch/mechanize/trunk/README.html.in Sat Oct 14 21:01:53 2006 @@ -580,6 +580,11 @@

        FAQs - usage

          +
        • I'm not getting the HTML page I expected to see. +
        • I'm sure this page is HTML, why does mechanize.Browser think otherwise? @{colorize(""" From jjlee at codespeak.net Sat Oct 14 21:29:08 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 14 Oct 2006 21:29:08 +0200 (CEST) Subject: [wwwsearch-commits] r33288 - in wwwsearch/mechanize/trunk: mechanize test Message-ID: <20061014192908.91CFF100F8@code0.codespeak.net> Author: jjlee Date: Sat Oct 14 21:29:07 2006 New Revision: 33288 Modified: wwwsearch/mechanize/trunk/mechanize/_http.py wwwsearch/mechanize/trunk/test/test_request.doctest Log: Backport stdlib urllib2 fix: Patch #1542948: fix urllib2 header casing issue. Modified: wwwsearch/mechanize/trunk/mechanize/_http.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_http.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_http.py Sat Oct 14 21:29:07 2006 @@ -634,6 +634,8 @@ # So make sure the connection gets closed after the (only) # request. headers["Connection"] = "close" + headers = dict( + (name.title(), val) for name, val in headers.items()) try: h.request(req.get_method(), req.get_selector(), req.data, headers) r = h.getresponse() Modified: wwwsearch/mechanize/trunk/test/test_request.doctest ============================================================================== --- wwwsearch/mechanize/trunk/test/test_request.doctest (original) +++ wwwsearch/mechanize/trunk/test/test_request.doctest Sat Oct 14 21:29:07 2006 @@ -2,3 +2,65 @@ >>> r = Request("http://example.com/foo#frag") >>> r.get_selector() '/foo' + + +Request Headers Dictionary +-------------------------- + +The Request.headers dictionary is not a documented interface. It should +stay that way, because the complete set of headers are only accessible +through the .get_header(), .has_header(), .header_items() interface. +However, .headers pre-dates those methods, and so real code will be using +the dictionary. + +The introduction in 2.4 of those methods was a mistake for the same reason: +code that previously saw all (urllib2 user)-provided headers in .headers +now sees only a subset (and the function interface is ugly and incomplete). +A better change would have been to replace .headers dict with a dict +subclass (or UserDict.DictMixin instance?) that preserved the .headers +interface and also provided access to the "unredirected" headers. It's +probably too late to fix that, though. + + +Check .capitalize() case normalization: + +>>> url = "http://example.com" +>>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"] +'blah' +>>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"] +'blah' + +Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError, +but that could be changed in future. + + +Request Headers Methods +----------------------- + +Note the case normalization of header names here, to .capitalize()-case. +This should be preserved for backwards-compatibility. (In the HTTP case, +normalization to .title()-case is done by urllib2 before sending headers to +httplib). + +>>> url = "http://example.com" +>>> r = Request(url, headers={"Spam-eggs": "blah"}) +>>> r.has_header("Spam-eggs") +True +>>> r.header_items() +[('Spam-eggs', 'blah')] +>>> r.add_header("Foo-Bar", "baz") +>>> items = r.header_items() +>>> items.sort() +>>> items +[('Foo-bar', 'baz'), ('Spam-eggs', 'blah')] + +Note that e.g. r.has_header("spam-EggS") is currently False, and +r.get_header("spam-EggS") returns None, but that could be changed in +future. + +>>> r.has_header("Not-there") +False +>>> print r.get_header("Not-there") +None +>>> r.get_header("Not-there", "default") +'default' From jjlee at codespeak.net Sat Oct 14 21:39:40 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 14 Oct 2006 21:39:40 +0200 (CEST) Subject: [wwwsearch-commits] r33289 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20061014193940.7F927100F9@code0.codespeak.net> Author: jjlee Date: Sat Oct 14 21:39:39 2006 New Revision: 33289 Modified: wwwsearch/mechanize/trunk/mechanize/_http.py Log: Fix 2.3 compat bug Modified: wwwsearch/mechanize/trunk/mechanize/_http.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_http.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_http.py Sat Oct 14 21:39:39 2006 @@ -635,7 +635,7 @@ # request. headers["Connection"] = "close" headers = dict( - (name.title(), val) for name, val in headers.items()) + [(name.title(), val) for name, val in headers.items()]) try: h.request(req.get_method(), req.get_selector(), req.data, headers) r = h.getresponse() From jjlee at codespeak.net Sat Oct 14 21:46:51 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 14 Oct 2006 21:46:51 +0200 (CEST) Subject: [wwwsearch-commits] r33290 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20061014194651.6A4DC100F9@code0.codespeak.net> Author: jjlee Date: Sat Oct 14 21:46:50 2006 New Revision: 33290 Modified: wwwsearch/mechanize/trunk/mechanize/_seek.py wwwsearch/mechanize/trunk/mechanize/_upgrade.py Log: In revision 32987, I added tests for responses being None in too many places (processors don't need those tests) Modified: wwwsearch/mechanize/trunk/mechanize/_seek.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_seek.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_seek.py Sat Oct 14 21:46:50 2006 @@ -6,6 +6,6 @@ """Make responses seekable.""" def any_response(self, request, response): - if response is not None and not hasattr(response, "seek"): + if not hasattr(response, "seek"): return response_seek_wrapper(response) return response Modified: wwwsearch/mechanize/trunk/mechanize/_upgrade.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_upgrade.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_upgrade.py Sat Oct 14 21:46:50 2006 @@ -29,7 +29,6 @@ # upgrade responses to be .close()able without becoming unusable handler_order = 0 # before anything else def any_response(self, request, response): - if (response is not None and - not hasattr(response, 'closeable_response')): + if not hasattr(response, 'closeable_response'): response = upgrade_response(response) return response From jjlee at codespeak.net Sat Oct 14 21:54:40 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 14 Oct 2006 21:54:40 +0200 (CEST) Subject: [wwwsearch-commits] r33291 - wwwsearch/mechanize/trunk Message-ID: <20061014195440.99989100FC@code0.codespeak.net> Author: jjlee Date: Sat Oct 14 21:54:39 2006 New Revision: 33291 Modified: wwwsearch/mechanize/trunk/README.html.in Log: Update todo list Modified: wwwsearch/mechanize/trunk/README.html.in ============================================================================== --- wwwsearch/mechanize/trunk/README.html.in (original) +++ wwwsearch/mechanize/trunk/README.html.in Sat Oct 14 21:54:39 2006 @@ -301,8 +301,6 @@ This is very roughly in order of priority
            -
          • Topological sort for handlers, instead of .handler_order attribute. - Add new build_opener and deprecate the old one?
          • Test .any_response() two handlers case: ordering.
          • Test referer bugs (frags and don't add in redirect unless orig req had Referer) @@ -323,6 +321,7 @@ (HTTP level should still use byte strings, of course).
          • clean_url(): test browser behaviour. I think this is correct... +
          • Use a nicer RFC 3986 join / split / unsplit implementation.
          • Figure out the Right Thing (if such a thing exists) for %-encoding.
          • How do IRIs fit into the world?
          • IDNA -- must read about security stuff first. @@ -334,7 +333,16 @@
          • gzip transfer encoding (there's already a handler for this in mechanize, but it's poorly implemented ATM).
          • proxy.pac parsing (I don't think this needs JS interpretation) -
          +
        • Topological sort for handlers, instead of .handler_order + att