From jjlee at codespeak.net Tue May 22 23:09:13 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Tue, 22 May 2007 23:09:13 +0200 (CEST) Subject: [wwwsearch-commits] r43567 - wwwsearch/ClientForm/trunk Message-ID: <20070522210913.E518E806B@code0.codespeak.net> Author: jjlee Date: Tue May 22 23:09:12 2007 New Revision: 43567 Modified: wwwsearch/ClientForm/trunk/ClientForm.py wwwsearch/ClientForm/trunk/test.py Log: Don't merge multiple SELECT controls with the same name Modified: wwwsearch/ClientForm/trunk/ClientForm.py ============================================================================== --- wwwsearch/ClientForm/trunk/ClientForm.py (original) +++ wwwsearch/ClientForm/trunk/ClientForm.py Tue May 22 23:09:12 2007 @@ -26,6 +26,10 @@ """ +def hexid(x): + import struct + return hex(struct.unpack('I', struct.pack('i', id(x)))[0]).lower().strip("l") + # XXX # SELECT outside of FORM loses last OPTION # add an __all__ @@ -1120,8 +1124,8 @@ for ii in range(len(controls)): type, name, attrs = controls[ii] # index=ii*10 allows ImageControl to return multiple ordered pairs - form.new_control(type, name, attrs, select_default=select_default, - index=ii*10) + form.new_control( + type, name, attrs, select_default=select_default, index=ii*10) forms.append(form) for form in forms: form.fixup() @@ -1760,6 +1764,7 @@ self.disabled = False self.readonly = False self.id = attrs.get("id") + self._closed = False # As Controls are merged in with .merge_control(), self.attrs will # refer to each Control in turn -- always the most recently merged @@ -1988,6 +1993,9 @@ "control.get(...).attrs") return self._get(name, by_label, nr).attrs + def close_control(self): + self._closed = True + def add_to_form(self, form): assert self._form is None or form == self._form, ( "can't add control to more than one form") @@ -1996,12 +2004,16 @@ # always count nameless elements as separate controls Control.add_to_form(self, form) else: - try: - control = form.find_control(self.name, self.type) - except (ControlNotFoundError, AmbiguityError): - Control.add_to_form(self, form) + for ii in range(len(form.controls)-1, -1, -1): + control = form.controls[ii] + if control.name == self.name and control.type == self.type: + if control._closed: + Control.add_to_form(self, form) + else: + control.merge_control(self) + break else: - control.merge_control(self) + Control.add_to_form(self, form) def merge_control(self, control): assert bool(control.multiple) == bool(self.multiple) @@ -2820,6 +2832,14 @@ control = klass(type, name, a, select_default, index) else: control = klass(type, name, a, index) + + if type == "select" and len(attrs) == 1: + for ii in range(len(self.controls)-1, -1, -1): + ctl = self.controls[ii] + if ctl.type == "select": + ctl.close_control() + break + control.add_to_form(self) control._urlparse = self._urlparse control._urlunparse = self._urlunparse Modified: wwwsearch/ClientForm/trunk/test.py ============================================================================== --- wwwsearch/ClientForm/trunk/test.py (original) +++ wwwsearch/ClientForm/trunk/test.py Tue May 22 23:09:12 2007 @@ -758,6 +758,36 @@ ctl = forms[0].find_control(type="textarea") self.assertEqual(ctl.value, "\r\nblah\r\n") +## # XXXX missing form elem --> complains about nested selects! (global form trouble?) + + def test_double_select(self): + # More than one SELECT control of the same name in a form never + # represent a single control (unlike RADIO and CHECKBOX elements), so + # don't merge them. + forms = ClientForm.ParseFile( + StringIO("""\ +
+ + +
+"""), + "http://example.com/", + backwards_compat=False, + ) + form = forms[0] + self.assertEquals(len(form.controls), 2) + ctl = form.find_control(name="a", nr=0) + self.assertEqual([item.name for item in ctl.items], ["b", "c"]) + ctl = form.find_control(name="a", nr=1) + self.assertEqual([item.name for item in ctl.items], ["d", "e"]) + + class DisabledTests(TestCase): def testOptgroup(self): From jjlee at codespeak.net Thu May 24 22:29:42 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 24 May 2007 22:29:42 +0200 (CEST) Subject: [wwwsearch-commits] r43607 - wwwsearch/ClientForm/trunk Message-ID: <20070524202942.E4C1F80A4@code0.codespeak.net> Author: jjlee Date: Thu May 24 22:29:42 2007 New Revision: 43607 Modified: wwwsearch/ClientForm/trunk/ClientForm.py wwwsearch/ClientForm/trunk/test.py Log: Fix ParseError affecting global SELECT and TEXTAREA controls Modified: wwwsearch/ClientForm/trunk/ClientForm.py ============================================================================== --- wwwsearch/ClientForm/trunk/ClientForm.py (original) +++ wwwsearch/ClientForm/trunk/ClientForm.py Thu May 24 22:29:42 2007 @@ -31,7 +31,6 @@ return hex(struct.unpack('I', struct.pack('i', id(x)))[0]).lower().strip("l") # XXX -# SELECT outside of FORM loses last OPTION # add an __all__ # Remove parser testing hack # safeUrl()-ize action @@ -547,8 +546,6 @@ def end_select(self): debug("") - if self._current_form is self._global_form: - return if self._select is None: raise ParseError("end of SELECT before start") @@ -628,8 +625,6 @@ def end_textarea(self): debug("") - if self._current_form is self._global_form: - return if self._textarea is None: raise ParseError("end of TEXTAREA before start") controls = self._current_form[2] Modified: wwwsearch/ClientForm/trunk/test.py ============================================================================== --- wwwsearch/ClientForm/trunk/test.py (original) +++ wwwsearch/ClientForm/trunk/test.py Thu May 24 22:29:42 2007 @@ -758,8 +758,6 @@ ctl = forms[0].find_control(type="textarea") self.assertEqual(ctl.value, "\r\nblah\r\n") -## # XXXX missing form elem --> complains about nested selects! (global form trouble?) - def test_double_select(self): # More than one SELECT control of the same name in a form never # represent a single control (unlike RADIO and CHECKBOX elements), so @@ -787,6 +785,32 @@ ctl = form.find_control(name="a", nr=1) self.assertEqual([item.name for item in ctl.items], ["d", "e"]) + def test_global_select(self): + # regression test: closing select and textarea tags should not be + # ignored, causing a ParseError due to incorrect tag nesting + + forms = ClientForm.ParseFileEx( + StringIO("""\ + + +"""), + "http://example.com/", + ) + + forms = ClientForm.ParseFile( + StringIO("""\ + + +"""), + "http://example.com/", + backwards_compat=False, + ) class DisabledTests(TestCase): From jjlee at codespeak.net Thu May 24 22:29:55 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 24 May 2007 22:29:55 +0200 (CEST) Subject: [wwwsearch-commits] r43608 - wwwsearch/ClientForm/trunk Message-ID: <20070524202955.DFE2280A4@code0.codespeak.net> Author: jjlee Date: Thu May 24 22:29:55 2007 New Revision: 43608 Modified: wwwsearch/ClientForm/trunk/ClientForm.py Log: Remove stray debugging code Modified: wwwsearch/ClientForm/trunk/ClientForm.py ============================================================================== --- wwwsearch/ClientForm/trunk/ClientForm.py (original) +++ wwwsearch/ClientForm/trunk/ClientForm.py Thu May 24 22:29:55 2007 @@ -26,10 +26,6 @@ """ -def hexid(x): - import struct - return hex(struct.unpack('I', struct.pack('i', id(x)))[0]).lower().strip("l") - # XXX # add an __all__ # Remove parser testing hack From jjlee at codespeak.net Thu May 24 22:34:55 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 24 May 2007 22:34:55 +0200 (CEST) Subject: [wwwsearch-commits] r43609 - wwwsearch/ClientForm/tag/release/0.2.7-2007-05-24T21:35:32 Message-ID: <20070524203455.E1C34809C@code0.codespeak.net> Author: jjlee Date: Thu May 24 22:34:55 2007 New Revision: 43609 Added: wwwsearch/ClientForm/tag/release/0.2.7-2007-05-24T21:35:32/ - copied from r43608, wwwsearch/ClientForm/trunk/ Modified: wwwsearch/ClientForm/tag/release/0.2.7-2007-05-24T21:35:32/setup.cfg Log: Tagged ClientForm (trunk -r HEAD, from working copy) release 0.2.7-2007-05-24T21:35:32 Modified: wwwsearch/ClientForm/tag/release/0.2.7-2007-05-24T21:35:32/setup.cfg ============================================================================== --- wwwsearch/ClientForm/trunk/setup.cfg (original) +++ wwwsearch/ClientForm/tag/release/0.2.7-2007-05-24T21:35:32/setup.cfg Thu May 24 22:34:55 2007 @@ -1,3 +0,0 @@ -[egg_info] -tag_build = .dev -tag_svn_revision = 1 From jjlee at codespeak.net Fri May 25 20:44:45 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Fri, 25 May 2007 20:44:45 +0200 (CEST) Subject: [wwwsearch-commits] r43644 - wwwsearch/ClientForm/trunk Message-ID: <20070525184445.89A098092@code0.codespeak.net> Author: jjlee Date: Fri May 25 20:44:44 2007 New Revision: 43644 Modified: wwwsearch/ClientForm/trunk/ClientForm.py Log: Add an __all__ Modified: wwwsearch/ClientForm/trunk/ClientForm.py ============================================================================== --- wwwsearch/ClientForm/trunk/ClientForm.py (original) +++ wwwsearch/ClientForm/trunk/ClientForm.py Fri May 25 20:44:44 2007 @@ -15,7 +15,7 @@ HTML 4.01 Specification, W3C Recommendation 24 December 1999 -Copyright 2002-2006 John J. Lee +Copyright 2002-2007 John J. Lee Copyright 2005 Gary Poster Copyright 2005 Zope Corporation Copyright 1998-2000 Gisle Aas. @@ -27,7 +27,6 @@ """ # XXX -# add an __all__ # Remove parser testing hack # safeUrl()-ize action # Switch to unicode throughout (would be 0.3.x) @@ -57,6 +56,16 @@ # Work on DOMForm. # XForms? Don't know if there's a need here. +__all__ = ['AmbiguityError', 'CheckboxControl', 'Control', + 'ControlNotFoundError', 'FileControl', 'FormParser', 'HTMLForm', + 'HiddenControl', 'IgnoreControl', 'ImageControl', 'IsindexControl', + 'Item', 'ItemCountError', 'ItemNotFoundError', 'Label', + 'ListControl', 'LocateError', 'Missing', 'NestingRobustFormParser', + 'ParseError', 'ParseFile', 'ParseFileEx', 'ParseResponse', + 'ParseResponseEx', 'PasswordControl', 'RadioControl', + 'RobustFormParser', 'ScalarControl', 'SelectControl', + 'SubmitButtonControl', 'SubmitControl', 'TextControl', + 'TextareaControl', 'XHTMLCompatibleFormParser'] try: True except NameError: From jjlee at codespeak.net Fri May 25 20:45:50 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Fri, 25 May 2007 20:45:50 +0200 (CEST) Subject: [wwwsearch-commits] r43645 - wwwsearch/ClientForm/common Message-ID: <20070525184550.33C968092@code0.codespeak.net> Author: jjlee Date: Fri May 25 20:45:49 2007 New Revision: 43645 Modified: wwwsearch/ClientForm/common/ChangeLog Log: Update ChangeLog Modified: wwwsearch/ClientForm/common/ChangeLog ============================================================================== --- wwwsearch/ClientForm/common/ChangeLog (original) +++ wwwsearch/ClientForm/common/ChangeLog Fri May 25 20:45:49 2007 @@ -1,6 +1,16 @@ This isn't really in proper GNU ChangeLog format, it just happens to look that way. +2007-05-24 John J Lee + * 0.2.7 release: + * Fix ParseError affecting global SELECT and TEXTAREA controls + * Fix entity ref double-decoding bug (thanks David Moews and + Bayle Shanks). + * Don't merge multiple SELECT controls with the same name. + * Fix bad use of module warnings. + * Add an __all__ attribute. + * Fix source file line endings in SVN (svn:eol-style native). + 2007-01-07 John J Lee * 0.2.6 release: * Don't allow underlying parser errors (e.g. SGMLParseError) From jjlee at codespeak.net Fri May 25 20:46:26 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Fri, 25 May 2007 20:46:26 +0200 (CEST) Subject: [wwwsearch-commits] r43646 - wwwsearch/ClientForm/tag/release/0.2.7-2007-05-24T21:35:32 Message-ID: <20070525184626.2B35F8092@code0.codespeak.net> Author: jjlee Date: Fri May 25 20:46:25 2007 New Revision: 43646 Removed: wwwsearch/ClientForm/tag/release/0.2.7-2007-05-24T21:35:32/ Log: Remove unused release tag From jjlee at codespeak.net Fri May 25 20:46:43 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Fri, 25 May 2007 20:46:43 +0200 (CEST) Subject: [wwwsearch-commits] r43647 - wwwsearch/ClientForm/tag/release/0.2.7-2007-05-25T19:47:20 Message-ID: <20070525184643.5D3B38092@code0.codespeak.net> Author: jjlee Date: Fri May 25 20:46:43 2007 New Revision: 43647 Added: wwwsearch/ClientForm/tag/release/0.2.7-2007-05-25T19:47:20/ - copied from r43646, wwwsearch/ClientForm/trunk/ Modified: wwwsearch/ClientForm/tag/release/0.2.7-2007-05-25T19:47:20/setup.cfg Log: Tagged ClientForm (trunk -r HEAD, from working copy) release 0.2.7-2007-05-25T19:47:20 Modified: wwwsearch/ClientForm/tag/release/0.2.7-2007-05-25T19:47:20/setup.cfg ============================================================================== --- wwwsearch/ClientForm/trunk/setup.cfg (original) +++ wwwsearch/ClientForm/tag/release/0.2.7-2007-05-25T19:47:20/setup.cfg Fri May 25 20:46:43 2007 @@ -1,3 +0,0 @@ -[egg_info] -tag_build = .dev -tag_svn_revision = 1 From jjlee at codespeak.net Fri May 25 21:29:17 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Fri, 25 May 2007 21:29:17 +0200 (CEST) Subject: [wwwsearch-commits] r43651 - wwwsearch/ClientForm/trunk Message-ID: <20070525192917.B08F88092@code0.codespeak.net> Author: jjlee Date: Fri May 25 21:29:16 2007 New Revision: 43651 Modified: wwwsearch/ClientForm/trunk/test.py Log: Revive a bit of accidentally-unreached test code Modified: wwwsearch/ClientForm/trunk/test.py ============================================================================== --- wwwsearch/ClientForm/trunk/test.py (original) +++ wwwsearch/ClientForm/trunk/test.py Fri May 25 21:29:16 2007 @@ -1139,25 +1139,27 @@ def testDisabledRadio(self): for compat in False, True: - self._testDisabledCheckbox(compat) + self._testDisabledRadio(compat) def _testDisabledRadio(self, compat): file = StringIO( """
- - - -
""") hide_deprecations() forms = ClientForm.ParseFile(file, "http://localhost/", backwards_compat=compat) form = forms[0] control = form.find_control('foo') - self.assert_(control.disabled) + + # since all items are disabled, .fixup() should not select + # anything + self.assertEquals( + [item.name for item in control.items if item.selected], + [], + ) + reset_deprecations() def testDisabledCheckbox(self): for compat in False, True: From jjlee at codespeak.net Mon May 28 17:22:10 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Mon, 28 May 2007 17:22:10 +0200 (CEST) Subject: [wwwsearch-commits] r43803 - in wwwsearch/mechanize/trunk: mechanize test Message-ID: <20070528152210.6107C809C@code0.codespeak.net> Author: jjlee Date: Mon May 28 17:22:09 2007 New Revision: 43803 Modified: wwwsearch/mechanize/trunk/mechanize/_http.py wwwsearch/mechanize/trunk/test/test_urllib2.py Log: Redirected robots.txt fetch no longer results in another attempted robots.txt fetch to check the redirection is allowed! Needs revisiting post-stable release. Modified: wwwsearch/mechanize/trunk/mechanize/_http.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_http.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_http.py Mon May 28 17:22:09 2007 @@ -79,12 +79,14 @@ # the same. # XXX really refresh redirections should be visiting; tricky to # fix, so this will wait until post-stable release - return Request(newurl, - headers=req.headers, - origin_req_host=req.get_origin_req_host(), - unverifiable=True, - visit=False, - ) + new = Request(newurl, + headers=req.headers, + origin_req_host=req.get_origin_req_host(), + unverifiable=True, + visit=False, + ) + new._origin_req = getattr(req, "_origin_req", req) + return new else: raise HTTPError(req.get_full_url(), code, msg, headers, fp) @@ -412,6 +414,15 @@ return request host = request.get_host() + + # robots.txt requests don't need to be allowed by robots.txt :-) + origin_req = getattr(request, "_origin_req", None) + if (origin_req is not None and + origin_req.get_selector() == "/robots.txt" and + origin_req.get_host() == host + ): + return request + if host != self._host: self.rfp = self.rfp_class() try: Modified: wwwsearch/mechanize/trunk/test/test_urllib2.py ============================================================================== --- wwwsearch/mechanize/trunk/test/test_urllib2.py (original) +++ wwwsearch/mechanize/trunk/test/test_urllib2.py Mon May 28 17:22:09 2007 @@ -806,6 +806,41 @@ h.http_request(req) self.assert_(rfpc.calls == []) + def test_redirected_robots_txt(self): + # redirected robots.txt fetch shouldn't result in another attempted + # robots.txt fetch to check the redirection is allowed! + import mechanize + from mechanize import build_opener, HTTPHandler, \ + HTTPDefaultErrorHandler, HTTPRedirectHandler, \ + HTTPRobotRulesProcessor + + class MockHTTPHandler(mechanize.BaseHandler): + def __init__(self): + self.requests = [] + def http_open(self, req): + import mimetools, httplib, copy + from StringIO import StringIO + self.requests.append(copy.deepcopy(req)) + if req.get_full_url() == "http://example.com/robots.txt": + hdr = "Location: http://example.com/en/robots.txt\r\n\r\n" + msg = mimetools.Message(StringIO(hdr)) + return self.parent.error( + "http", req, test_response(), 302, "Blah", msg) + else: + return test_response("Allow: *", [], req.get_full_url()) + + hh = MockHTTPHandler() + hdeh = HTTPDefaultErrorHandler() + hrh = HTTPRedirectHandler() + rh = HTTPRobotRulesProcessor() + o = build_test_opener(hh, hdeh, hrh, rh) + o.open("http://example.com/") + self.assertEqual([req.get_full_url() for req in hh.requests], + ["http://example.com/robots.txt", + "http://example.com/en/robots.txt", + "http://example.com/", + ]) + def test_cookies(self): cj = MockCookieJar() h = HTTPCookieProcessor(cj) From jjlee at codespeak.net Mon May 28 17:34:40 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Mon, 28 May 2007 17:34:40 +0200 (CEST) Subject: [wwwsearch-commits] r43808 - in wwwsearch/mechanize/trunk: mechanize test Message-ID: <20070528153440.57C3680A4@code0.codespeak.net> Author: jjlee Date: Mon May 28 17:34:39 2007 New Revision: 43808 Modified: wwwsearch/mechanize/trunk/mechanize/_rfc3986.py wwwsearch/mechanize/trunk/test/test_rfc3986.doctest Log: Fix exception raised by RFC 3986 implementation with urljoin(base, '/..') Modified: wwwsearch/mechanize/trunk/mechanize/_rfc3986.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_rfc3986.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_rfc3986.py Mon May 28 17:34:39 2007 @@ -200,7 +200,8 @@ continue if path == "/..": path = "/" - r.pop() + if r: + r.pop() continue # D if path == ".": Modified: wwwsearch/mechanize/trunk/test/test_rfc3986.doctest ============================================================================== --- wwwsearch/mechanize/trunk/test/test_rfc3986.doctest (original) +++ wwwsearch/mechanize/trunk/test/test_rfc3986.doctest Mon May 28 17:34:39 2007 @@ -59,6 +59,10 @@ '/' >>> remove_dot_segments("./") '' +>>> remove_dot_segments("/..") +'/' +>>> remove_dot_segments("/../") +'/' Examples from RFC 3986 section 5.4 @@ -154,3 +158,11 @@ 'http://a/b/c/g#s/../x' >>> join("http:g") 'http://a/b/c/g' + + +Additional urljoin tests, not taken from RFC: + +>>> join("/..") +'http://a/' +>>> join("/../") +'http://a/' From jjlee at codespeak.net Mon May 28 18:11:08 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Mon, 28 May 2007 18:11:08 +0200 (CEST) Subject: [wwwsearch-commits] r43811 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20070528161108.1CBB68099@code0.codespeak.net> Author: jjlee Date: Mon May 28 18:11:07 2007 New Revision: 43811 Modified: wwwsearch/mechanize/trunk/mechanize/_http.py Log: Add some debugging code to show URL with HTTPErrors (would be nice to always have this, but don't want to change the precise exception raised right now) Modified: wwwsearch/mechanize/trunk/mechanize/_http.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_http.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_http.py Mon May 28 18:11:07 2007 @@ -27,6 +27,12 @@ debug = logging.getLogger("mechanize").debug +# monkeypatch urllib2.HTTPError to show URL +## def urllib2_str(self): +## return 'HTTP Error %s: %s (%s)' % ( +## self.code, self.msg, self.geturl()) +## urllib2.HTTPError.__str__ = urllib2_str + CHUNK = 1024 # size of chunks fed to HTML HEAD parser, in bytes DEFAULT_ENCODING = 'latin-1' From jjlee at codespeak.net Mon May 28 23:06:57 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Mon, 28 May 2007 23:06:57 +0200 (CEST) Subject: [wwwsearch-commits] r43826 - in wwwsearch/mechanize/trunk: . test-tools Message-ID: <20070528210657.442B48095@code0.codespeak.net> Author: jjlee Date: Mon May 28 23:06:56 2007 New Revision: 43826 Added: wwwsearch/mechanize/trunk/test-tools/testprogram.py wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py Modified: wwwsearch/mechanize/trunk/functional_tests.py wwwsearch/mechanize/trunk/test.py Log: Add an -l option to run the functional tests against a local twisted.web2-based server. There are still a few tests that always run against the wwwsearch.sf.net site -- need to extend the local srver a bit to cover these. Modified: wwwsearch/mechanize/trunk/functional_tests.py ============================================================================== --- wwwsearch/mechanize/trunk/functional_tests.py (original) +++ wwwsearch/mechanize/trunk/functional_tests.py Mon May 28 23:06:56 2007 @@ -2,7 +2,9 @@ # These tests access the network. -import os +# thanks Moof (aka Giles Antonio Radford) for some of these + +import os, sys from unittest import TestCase import mechanize @@ -11,6 +13,14 @@ HTTPHandler, HTTPRefreshProcessor, \ HTTPEquivProcessor, HTTPRedirectHandler, \ HTTPRedirectDebugProcessor, HTTPResponseDebugProcessor +from mechanize._rfc3986 import urljoin + +# XXX +# document twisted.web2 install (I forgot how I did it -- reinstall!) +# implement remaining stuff used by functional_tests.py +# in twisted-localserver.py: +# - 302 followed by 404 response +# - helper cgi script for cookies &c. #from cookielib import CookieJar #from urllib2 import build_opener, install_opener, urlopen @@ -20,8 +30,9 @@ ## import logging ## logger = logging.getLogger("mechanize") -## logger.addHandler(logging.StreamHandler()) -## logger.setLevel(logging.DEBUG) +## logger.addHandler(logging.StreamHandler(sys.stdout)) +## #logger.setLevel(logging.DEBUG) +## logger.setLevel(logging.INFO) def sanepathname2url(path): @@ -39,7 +50,7 @@ self.browser = mechanize.Browser() def test_simple(self): - self.browser.open('http://wwwsearch.sourceforge.net/') + self.browser.open(self.uri) self.assertEqual(self.browser.title(), 'Python bits') # relative URL self.browser.open('/mechanize/') @@ -52,7 +63,7 @@ import urllib2 self.assertRaises( urllib2.HTTPError, - self.browser.open, "http://wwwsearch.sf.net/doesnotexist" + self.browser.open, "http://wwwsearch.sf.net/doesnotexist", ) def test_reread(self): @@ -60,7 +71,7 @@ # be true for e.g. mechanize.OpenerDirector when mechanize's own # handlers are in use, but is guaranteed to be true for # mechanize.Browser) - r = self.browser.open('http://wwwsearch.sourceforge.net/') + r = self.browser.open(self.uri) data = r.read() r.close() r.seek(0) @@ -70,12 +81,12 @@ def test_error_recovery(self): self.assertRaises(OSError, self.browser.open, 'file:///c|thisnoexistyiufheiurgbueirgbue') - self.browser.open('http://wwwsearch.sourceforge.net/') + self.browser.open(self.uri) self.assertEqual(self.browser.title(), 'Python bits') def test_redirect(self): # 301 redirect due to missing final '/' - r = self.browser.open('http://wwwsearch.sourceforge.net/bits') + r = self.browser.open(urljoin(self.uri, "bits")) self.assertEqual(r.code, 200) self.assert_("GeneralFAQ.html" in r.read(2048)) @@ -92,7 +103,7 @@ self.assertRaises(mechanize.BrowserStateError, br.back) test_state(self.browser) # note this involves a redirect, which should itself be non-visiting - r = self.browser.open_novisit("http://wwwsearch.sourceforge.net/bits") + r = self.browser.open_novisit(urljoin(self.uri, "bits")) test_state(self.browser) self.assert_("GeneralFAQ.html" in r.read(2048)) @@ -102,7 +113,7 @@ ua = mechanize.UserAgent() ua.set_seekable_responses(False) ua.set_handle_equiv(False) - response = ua.open('http://wwwsearch.sourceforge.net/') + response = ua.open(self.uri) self.failIf(hasattr(response, "seek")) data = response.read() self.assert_("Python bits" in data) @@ -112,7 +123,7 @@ def test_seek(self): br = mechanize.Browser() - r = br.open("http://wwwsearch.sourceforge.net/") + r = br.open(self.uri) html = r.read() r.seek(0) self.assertEqual(r.read(), html) @@ -120,7 +131,7 @@ def test_seekable_response_opener(self): opener = mechanize.OpenerFactory( mechanize.SeekableResponseOpener).build_opener() - r = opener.open("http://wwwsearch.sourceforge.net/bits/cctest2.txt") + r = opener.open(urljoin(self.uri, "bits/cctest2.txt")) r.read() r.seek(0) self.assertEqual(r.read(), @@ -130,11 +141,10 @@ def test_no_seek(self): # should be possible to turn off UserAgent's .seek() functionality def check_no_seek(opener): - r = opener.open( - "http://wwwsearch.sourceforge.net/bits/cctest2.txt") + r = opener.open(urljoin(self.uri, "bits/cctest2.txt")) self.assert_(not hasattr(r, "seek")) try: - opener.open("http://wwwsearch.sourceforge.net/nonexistent") + opener.open(urljoin(self.uri, "nonexistent")) except mechanize.HTTPError, exc: self.assert_(not hasattr(exc, "seek")) @@ -154,13 +164,12 @@ # .seek() method, then raised HTTPError exceptions should also have the # .seek() method def check(opener, excs_also): - r = opener.open( - "http://wwwsearch.sourceforge.net/bits/cctest2.txt") + r = opener.open(urljoin(self.uri, "bits/cctest2.txt")) data = r.read() r.seek(0) self.assertEqual(data, r.read(), r.get_data()) try: - opener.open("http://wwwsearch.sourceforge.net/nonexistent") + opener.open(urljoin(self.uri, "nonexistent")) except mechanize.HTTPError, exc: data = exc.read() if excs_also: @@ -189,7 +198,7 @@ def test_set_response(self): br = mechanize.Browser() - r = br.open("http://wwwsearch.sourceforge.net/") + r = br.open(self.uri) html = r.read() self.assertEqual(br.title(), "Python bits") @@ -226,7 +235,7 @@ import pickle b = mechanize.Browser() - r = b.open("http://wwwsearch.sourceforge.net/bits/cctest2.txt") + r = b.open(urljoin(self.uri, "bits/cctest2.txt")) r.read() r.close() @@ -301,11 +310,11 @@ plain_opener = mechanize.build_opener(mechanize.HTTPRobotRulesProcessor) browser = mechanize.Browser() for opener in plain_opener, browser: - r = opener.open("http://wwwsearch.sourceforge.net/robots") + r = opener.open(urljoin(self.uri, "robots")) self.assertEqual(r.code, 200) self.assertRaises( mechanize.RobotExclusionError, - opener.open, "http://wwwsearch.sourceforge.net/norobots") + opener.open, urljoin(self.uri, "norobots")) def test_urlretrieve(self): url = "http://www.python.org/" @@ -341,11 +350,10 @@ def test_reload_read_incomplete(self): from mechanize import Browser browser = Browser() - r1 = browser.open( - "http://wwwsearch.sourceforge.net/bits/mechanize_reload_test.html") + r1 = browser.open(urljoin(self.uri, "bits/mechanize_reload_test.html")) # if we don't do anything and go straight to another page, most of the # last page's response won't be .read()... - r2 = browser.open("http://wwwsearch.sourceforge.net/mechanize") + r2 = browser.open(urljoin(self.uri, "mechanize")) self.assert_(len(r1.get_data()) < 4097) # we only .read() a little bit # ...so if we then go back, .follow_link() for a link near the end (a # few kb in, past the point that always gets read in HTML files because @@ -378,5 +386,29 @@ if __name__ == "__main__": - import unittest - unittest.main() + import sys + sys.path.insert(0, "test-tools") + import testprogram + USAGE_EXAMPLES = """ +Examples: + %(progName)s + - run all tests + %(progName)s functional_tests.SimpleTests + - run all 'test*' test methods in class SimpleTests + %(progName)s functional_tests.SimpleTests.test_redirect + - run SimpleTests.test_redirect + + %(progName)s -l + - start a local Twisted HTTP server and run the functional + tests against that, rather than against SourceForge + (quicker!) + Note not all the functional tests use the local server yet + -- some currently always access the internet regardless of + this option and the --uri option. +""" + prog = testprogram.TestProgram( + ["functional_tests"], + localServerProcess=testprogram.TwistedServerProcess(), + usageExamples=USAGE_EXAMPLES, + ) + result = prog.runTests() Added: wwwsearch/mechanize/trunk/test-tools/testprogram.py ============================================================================== --- (empty file) +++ wwwsearch/mechanize/trunk/test-tools/testprogram.py Mon May 28 23:06:56 2007 @@ -0,0 +1,311 @@ +"""Local server and cgitb support.""" + +import cgitb +#cgitb.enable(format="text") + +import sys, os, traceback, logging, glob, time +from unittest import defaultTestLoader, TextTestRunner, TestSuite, TestCase, \ + _TextTestResult + + +class ServerProcess: + + def __init__(self, filename, name=None): + if filename is None: + raise ValueError('filename arg must be a string') + if name is None: + name = filename + self.name = os.path.basename(name) + self.port = None + self.report_hook = lambda msg: None + self._filename = filename + + def _get_args(self): + """Return list of command line arguments. + + Override me. + """ + return [] + + def start(self): + self.report_hook("starting (%s)" % ( + [sys.executable, self._filename]+self._get_args())) + self._pid = os.spawnv( + os.P_NOWAIT, + sys.executable, + [sys.executable, self._filename]+self._get_args()) + self.report_hook("waiting for startup") + self._wait_for_startup() + self.report_hook("running") + + def _wait_for_startup(self): + import socket + def connect(): + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(1.0) + try: + sock.connect(('127.0.0.1', self.port)) + finally: + sock.close() + backoff(connect, (socket.error,)) + + def stop(self): + """Kill process (forcefully if necessary).""" + if os.name == 'nt': + kill_windows(self._pid, self.report_hook) + else: + kill_posix(self._pid, self.report_hook) + +def backoff(func, errors, + initial_timeout=1., hard_timeout=60., factor=1.2): + starttime = time.time() + timeout = initial_timeout + while time.time() < starttime + hard_timeout - 0.01: + try: + func() + except errors, exc: + time.sleep(timeout) + timeout *= factor + hard_limit = hard_timeout - (time.time() - starttime) + timeout = min(timeout, hard_limit) + else: + break + +def kill_windows(handle, report_hook): + try: + import win32api + except ImportError: + import ctypes + ctypes.windll.kernel32.TerminateProcess(int(handle), -1) + else: + win32api.TerminateProcess(int(handle), -1) + +def kill_posix(pid, report_hook): + import signal + os.kill(pid, signal.SIGTERM) + + timeout = 10. + starttime = time.time() + report_hook("waiting for exit") + def do_nothing(*args): + pass + old_handler = signal.signal(signal.SIGCHLD, do_nothing) + try: + while time.time() < starttime + timeout - 0.01: + pid, sts = os.waitpid(pid, os.WNOHANG) + if pid != 0: + # exited, or error + break + newtimeout = timeout - (time.time() - starttime) - 1. + time.sleep(newtimeout) # wait for signal + else: + report_hook("forcefully killing") + try: + os.kill(pid, signal.SIGKILL) + except OSError, exc: + if exc.errno != errno.ECHILD: + raise + finally: + signal.signal(signal.SIGCHLD, old_handler) + +class TwistedServerProcess(ServerProcess): + + def __init__(self, name=None): + top_level_dir = os.path.dirname(os.path.abspath(sys.argv[0])) + path = os.path.join(top_level_dir, "test-tools/twisted-localserver.py") + ServerProcess.__init__(self, path, name) + + def _get_args(self): + return [str(self.port)] + + +class CgitbTextResult(_TextTestResult): + def _exc_info_to_string(self, err, test): + """Converts a sys.exc_info()-style tuple of values into a string.""" + exctype, value, tb = err + # Skip test runner traceback levels + while tb and self._is_relevant_tb_level(tb): + tb = tb.tb_next + if exctype is test.failureException: + # Skip assert*() traceback levels + length = self._count_relevant_tb_levels(tb) + return cgitb.text((exctype, value, tb)) + return cgitb.text((exctype, value, tb)) + +class CgitbTextTestRunner(TextTestRunner): + def _makeResult(self): + return CgitbTextResult(self.stream, self.descriptions, self.verbosity) + +def add_uri_attribute_to_test_cases(suite, uri): + for test in suite._tests: + if isinstance(test, TestCase): + test.uri = uri + else: + try: + add_uri_attribute_to_test_cases(test, uri) + except AttributeError: + pass + + +class TestProgram: + """A command-line program that runs a set of tests; this is primarily + for making test modules conveniently executable. + """ + USAGE = """\ +Usage: %(progName)s [options] [test] [...] + +Note not all the functional tests take note of the --uri argument yet -- +some currently always access the internet regardless of the --uri and +--run-local-server options. + +Options: + -l, --run-local-server + Run a local Twisted HTTP server for the functional + tests. You need Twisted installed for this to work. + The server is run on the port given in the --uri + option. If --run-local-server is given but no --uri is + given, http://127.0.0.1:8000 is used as the base URI. + Also, if you're on Windows and don't have pywin32 or + ctypes installed, this option won't work, and you'll + have to start up test-tools/localserver.py manually. + --uri=URL Base URI for functional tests + (test.py does not access the network, unless you tell + it to run module functional_tests; + functional_tests.py does access the network) + e.g. --uri=http://127.0.0.1:8000/ + -h, --help Show this message + -v, --verbose Verbose output + -q, --quiet Minimal output + +The following options are only available through test.py (you can still run the +functional tests through test.py, just give 'functional_tests' as the module +name to run): + + -u Skip plain (non-doctest) unittests + -d Skip doctests + -c Run coverage (requires coverage.py, seems buggy) + -t Display tracebacks using cgitb's text mode + +""" + USAGE_EXAMPLES = """ +Examples: + %(progName)s + - run all tests + %(progName)s test_cookies + - run module 'test_cookies' + %(progName)s test_cookies.CookieTests + - run all 'test*' test methods in test_cookies.CookieTests + %(progName)s test_cookies.CookieTests.test_expires + - run test_cookies.CookieTests.test_expires + + %(progName)s functional_tests + - run the functional tests + %(progName)s -l functional_tests + - start a local Twisted HTTP server and run the functional + tests against that, rather than against SourceForge + (quicker!) +""" + def __init__(self, moduleNames, localServerProcess, defaultTest=None, + argv=None, testRunner=None, testLoader=defaultTestLoader, + defaultUri="http://wwwsearch.sf.net/", + usageExamples=USAGE_EXAMPLES, + ): + self.modules = [] + for moduleName in moduleNames: + module = __import__(moduleName) + for part in moduleName.split('.')[1:]: + module = getattr(module, part) + self.modules.append(module) + self.uri = None + self._defaultUri = defaultUri + if argv is None: + argv = sys.argv + self.verbosity = 1 + self.defaultTest = defaultTest + self.testRunner = testRunner + self.testLoader = testLoader + self.progName = os.path.basename(argv[0]) + self.usageExamples = usageExamples + self.runLocalServer = False + self.parseArgs(argv) + if self.runLocalServer: + import urllib + from mechanize._rfc3986 import urlsplit + authority = urlsplit(self.uri)[1] + host, port = urllib.splitport(authority) + if port is None: + port = "80" + try: + port = int(port) + except: + self.usageExit("port in --uri value must be an integer " + "(try --uri=http://127.0.0.1:8000/)") + self._serverProcess = localServerProcess + def report(msg): + print "%s: %s" % (localServerProcess.name, msg) + localServerProcess.port = port + localServerProcess.report_hook = report + + def usageExit(self, msg=None): + if msg: print msg + print (self.USAGE + self.usageExamples) % self.__dict__ + sys.exit(2) + + def parseArgs(self, argv): + import getopt + try: + options, args = getopt.getopt( + argv[1:], + 'hHvql', + ['help','verbose','quiet', 'uri=', 'run-local-server'], + ) + uri = None + for opt, value in options: + if opt in ('-h','-H','--help'): + self.usageExit() + if opt in ('--uri',): + uri = value + if opt in ('-q','--quiet'): + self.verbosity = 0 + if opt in ('-v','--verbose'): + self.verbosity = 2 + if opt in ('-l', '--run-local-server'): + self.runLocalServer = True + if uri is None: + if self.runLocalServer: + uri = "http://127.0.0.1:8000" + else: + uri = self._defaultUri + self.uri = uri + if len(args) == 0 and self.defaultTest is None: + suite = TestSuite() + for module in self.modules: + test = self.testLoader.loadTestsFromModule(module) + suite.addTest(test) + self.test = suite + add_uri_attribute_to_test_cases(self.test, self.uri) + return + if len(args) > 0: + self.testNames = args + else: + self.testNames = (self.defaultTest,) + self.createTests() + add_uri_attribute_to_test_cases(self.test, self.uri) + except getopt.error, msg: + self.usageExit(msg) + + def createTests(self): + self.test = self.testLoader.loadTestsFromNames(self.testNames) + + def runTests(self): + if self.testRunner is None: + self.testRunner = TextTestRunner(verbosity=self.verbosity) + + if self.runLocalServer: + self._serverProcess.start() + try: + result = self.testRunner.run(self.test) + finally: + if self.runLocalServer: + self._serverProcess.stop() + return result Added: wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py ============================================================================== --- (empty file) +++ wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py Mon May 28 23:06:56 2007 @@ -0,0 +1,91 @@ +#!/usr/bin/env python +""" +%prog port + +e.g. %prog 8000 + +Runs a local server to point the mechanize functional tests at. Example: + +python test-tools/twisted-localserver.py 8042 +python functional_tests.py --uri=http://localhost:8042/ + +You need Twisted XXX version to run it: + +XXX installation instructions +""" + +import os, sys, re, time +from twisted.web2 import server, http, resource, channel, \ + static, http_headers, responsecode + +from twisted.internet import reactor + +def html(title=None): + f = open("README.html", "r") + html = f.read() + if title is not None: + html = re.sub("(.*)", "%s" % title, html) + return html + +MECHANIZE_HTML = html() +ROOT_HTML = html("Python bits") +RELOAD_TEST_HTML = """\ + +Title + + +near the start + +

Now some data to prevent HEAD parsing from reading the link near +the end. + +

+%s
+ +near the end + + + +""" % (("0123456789ABCDEF"*4+"\n")*61) + + +class Page(resource.Resource): + + addSlash = True + content_type = http_headers.MimeType("text", "html") + + def render(self, ctx): + return http.Response( + responsecode.OK, + {"content-type": self.content_type}, + self.text) + +def make_page(root, name, text, + content_type="text/html"): + page = Page() + page.text = text + base_type, specific_type = content_type.split("/") + page.content_type = http_headers.MimeType(base_type, specific_type) + setattr(root, "child_"+name, page) + return page + +def main(): + root = Page() + root.text = ROOT_HTML + make_page(root, "mechanize", MECHANIZE_HTML) + bits = make_page(root, "robots.txt", + "User-Agent: *\nDisallow: /norobots", + "text/plain") + bits = make_page(root, "robots", "Hello, robots.", "text/plain") + bits = make_page(root, "norobots", "Hello, non-robots.", "text/plain") + bits = make_page(root, "bits", "GeneralFAQ.html") + make_page(bits, "cctest2.txt", + "Hello ClientCookie functional test suite.", + "text/plain") + make_page(bits, "mechanize_reload_test.html", RELOAD_TEST_HTML) + + site = server.Site(root) + reactor.listenTCP(int(sys.argv[1]), channel.HTTPFactory(site)) + reactor.run() + +main() Modified: wwwsearch/mechanize/trunk/test.py ============================================================================== --- wwwsearch/mechanize/trunk/test.py (original) +++ wwwsearch/mechanize/trunk/test.py Mon May 28 23:06:56 2007 @@ -8,9 +8,6 @@ """ -import cgitb -#cgitb.enable(format="text") - # Modules containing tests to run -- a test is anything named *Tests, which # should be classes deriving from unittest.TestCase. MODULE_NAMES = ["test_date", "test_browser", "test_response", "test_cookies", @@ -18,9 +15,7 @@ "test_useragent", "test_html", "test_opener", ] -import sys, os, traceback, logging, glob -from unittest import defaultTestLoader, TextTestRunner, TestSuite, TestCase, \ - _TextTestResult +import sys, os, logging, glob #level = logging.DEBUG #level = logging.INFO @@ -30,109 +25,12 @@ #logging.getLogger("mechanize").addHandler(logging.StreamHandler(sys.stdout)) -class CgitbTextResult(_TextTestResult): - def _exc_info_to_string(self, err, test): - """Converts a sys.exc_info()-style tuple of values into a string.""" - exctype, value, tb = err - # Skip test runner traceback levels - while tb and self._is_relevant_tb_level(tb): - tb = tb.tb_next - if exctype is test.failureException: - # Skip assert*() traceback levels - length = self._count_relevant_tb_levels(tb) - return cgitb.text((exctype, value, tb)) - return cgitb.text((exctype, value, tb)) - -class CgitbTextTestRunner(TextTestRunner): - def _makeResult(self): - return CgitbTextResult(self.stream, self.descriptions, self.verbosity) - - -class TestProgram: - """A command-line program that runs a set of tests; this is primarily - for making test modules conveniently executable. - """ - USAGE = """\ -Usage: %(progName)s [options] [test] [...] - -Options: - -h, --help Show this message - -v, --verbose Verbose output - -q, --quiet Minimal output - -Examples: - %(progName)s - run default set of tests - %(progName)s MyTestSuite - run suite 'MyTestSuite' - %(progName)s MyTestCase.testSomething - run MyTestCase.testSomething - %(progName)s MyTestCase - run all 'test*' test methods - in MyTestCase -""" - def __init__(self, moduleNames, defaultTest=None, - argv=None, testRunner=None, testLoader=defaultTestLoader): - self.modules = [] - for moduleName in moduleNames: - module = __import__(moduleName) - for part in moduleName.split('.')[1:]: - module = getattr(module, part) - self.modules.append(module) - if argv is None: - argv = sys.argv - self.verbosity = 1 - self.defaultTest = defaultTest - self.testRunner = testRunner - self.testLoader = testLoader - self.progName = os.path.basename(argv[0]) - self.parseArgs(argv) - - def usageExit(self, msg=None): - if msg: print msg - print self.USAGE % self.__dict__ - sys.exit(2) - - def parseArgs(self, argv): - import getopt - try: - options, args = getopt.getopt(argv[1:], 'hHvq', - ['help','verbose','quiet']) - for opt, value in options: - if opt in ('-h','-H','--help'): - self.usageExit() - if opt in ('-q','--quiet'): - self.verbosity = 0 - if opt in ('-v','--verbose'): - self.verbosity = 2 - if len(args) == 0 and self.defaultTest is None: - suite = TestSuite() - for module in self.modules: - test = self.testLoader.loadTestsFromModule(module) - suite.addTest(test) - self.test = suite - return - if len(args) > 0: - self.testNames = args - else: - self.testNames = (self.defaultTest,) - self.createTests() - except getopt.error, msg: - self.usageExit(msg) - - def createTests(self): - self.test = self.testLoader.loadTestsFromNames(self.testNames) - - def runTests(self): - if self.testRunner is None: - self.testRunner = TextTestRunner(verbosity=self.verbosity) - result = self.testRunner.run(self.test) - return result - - if __name__ == "__main__": -## sys.path.insert(0, '/home/john/comp/dev/rl/jjlee/lib/python') -## import jjl -## import __builtin__ -## __builtin__.jjl = jjl + # XXX + # temporary stop-gap to run doctests &c. + # should switch to nose or something - # XXX temporary stop-gap to run doctests + top_level_dir = os.path.dirname(os.path.abspath(sys.argv[0])) # XXXX coverage output seems incorrect ATM run_coverage = "-c" in sys.argv @@ -159,6 +57,7 @@ # that renamed module. sys.path.insert(0, "test-tools") import doctest + import testprogram import coverage if run_coverage: @@ -176,7 +75,8 @@ if run_doctests: # run .doctest files needing special support common_globs = {"mechanize": mechanize} - pm_doctest_filename = os.path.join("test", "test_password_manager.doctest") + pm_doctest_filename = os.path.join( + "test", "test_password_manager.doctest") for globs in [ {"mgr_class": mechanize.HTTPPasswordMgr}, {"mgr_class": mechanize.HTTPProxyPasswordMgr}, @@ -217,8 +117,12 @@ sys.path.insert(0, test_path) test_runner = None if use_cgitb: - test_runner = CgitbTextTestRunner() - prog = TestProgram(MODULE_NAMES, testRunner=test_runner) + test_runner = testprogram.CgitbTextTestRunner() + prog = testprogram.TestProgram( + MODULE_NAMES, + testRunner=test_runner, + localServerProcess=testprogram.TwistedServerProcess(), + ) result = prog.runTests() if run_coverage: From jjlee at codespeak.net Thu May 31 00:37:14 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 31 May 2007 00:37:14 +0200 (CEST) Subject: [wwwsearch-commits] r43927 - in wwwsearch/mechanize/trunk: . test-tools Message-ID: <20070530223714.0F1738080@code0.codespeak.net> Author: jjlee Date: Thu May 31 00:37:13 2007 New Revision: 43927 Modified: wwwsearch/mechanize/trunk/functional_tests.py wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py Log: A bit more work on local twisted server for functional tests: Add 302 redirection page to get rid of one of the hardcoded wwwsearch URLs. Fix some issues in local twisted server. Modified: wwwsearch/mechanize/trunk/functional_tests.py ============================================================================== --- wwwsearch/mechanize/trunk/functional_tests.py (original) +++ wwwsearch/mechanize/trunk/functional_tests.py Thu May 31 00:37:13 2007 @@ -57,13 +57,13 @@ self.assertEqual(self.browser.title(), 'mechanize') def test_302_and_404(self): - # the combination of 302 (caused by use of "sf.net") and 404 has caused - # problems in the past due to accidental double-wrapping of the error - # response + # the combination of 302 and 404 (/redirected is configured to redirect + # to a non-existent URL /nonexistent) has caused problems in the past + # due to accidental double-wrapping of the error response import urllib2 self.assertRaises( urllib2.HTTPError, - self.browser.open, "http://wwwsearch.sf.net/doesnotexist", + self.browser.open, urljoin(self.uri, "/redirected"), ) def test_reread(self): Modified: wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py ============================================================================== --- wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py (original) +++ wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py Thu May 31 00:37:13 2007 @@ -60,29 +60,45 @@ {"content-type": self.content_type}, self.text) -def make_page(root, name, text, - content_type="text/html"): +def _make_page(root, name, text, + content_type="text/html", + leaf=False): page = Page() page.text = text base_type, specific_type = content_type.split("/") page.content_type = http_headers.MimeType(base_type, specific_type) + page.addSlash = not leaf setattr(root, "child_"+name, page) return page +def make_page(root, name, text, + content_type="text/html"): + return _make_page(root, name, text, content_type, leaf=False) + +def make_leaf_page(root, name, text, + content_type="text/html"): + return _make_page(root, name, text, content_type, leaf=True) + +def make_redirect(root, name, location_relative_ref): + redirect = resource.RedirectResource(path=location_relative_ref) + setattr(root, "child_"+name, redirect) + return redirect + def main(): root = Page() root.text = ROOT_HTML make_page(root, "mechanize", MECHANIZE_HTML) - bits = make_page(root, "robots.txt", - "User-Agent: *\nDisallow: /norobots", - "text/plain") - bits = make_page(root, "robots", "Hello, robots.", "text/plain") - bits = make_page(root, "norobots", "Hello, non-robots.", "text/plain") + make_leaf_page(root, "robots.txt", + "User-Agent: *\nDisallow: /norobots", + "text/plain") + make_leaf_page(root, "robots", "Hello, robots.", "text/plain") + make_leaf_page(root, "norobots", "Hello, non-robots.", "text/plain") bits = make_page(root, "bits", "GeneralFAQ.html") - make_page(bits, "cctest2.txt", - "Hello ClientCookie functional test suite.", - "text/plain") - make_page(bits, "mechanize_reload_test.html", RELOAD_TEST_HTML) + make_leaf_page(bits, "cctest2.txt", + "Hello ClientCookie functional test suite.", + "text/plain") + make_leaf_page(bits, "mechanize_reload_test.html", RELOAD_TEST_HTML) + make_redirect(root, "redirected", "/doesnotexist") site = server.Site(root) reactor.listenTCP(int(sys.argv[1]), channel.HTTPFactory(site)) From jjlee at codespeak.net Thu May 31 01:13:21 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 31 May 2007 01:13:21 +0200 (CEST) Subject: [wwwsearch-commits] r43928 - in wwwsearch/mechanize/trunk: . test-tools Message-ID: <20070530231321.640438093@code0.codespeak.net> Author: jjlee Date: Thu May 31 01:13:20 2007 New Revision: 43928 Modified: wwwsearch/mechanize/trunk/functional_tests.py wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py Log: Get rid of the remaining hard-coded URLs in functional tests :-) Modified: wwwsearch/mechanize/trunk/functional_tests.py ============================================================================== --- wwwsearch/mechanize/trunk/functional_tests.py (original) +++ wwwsearch/mechanize/trunk/functional_tests.py Thu May 31 01:13:20 2007 @@ -256,8 +256,8 @@ def test_referer(self): br = mechanize.Browser() - referer = "http://wwwsearch.sourceforge.net/bits/referertest.html" - info = "http://wwwsearch.sourceforge.net/cgi-bin/cookietest.cgi" + referer = urljoin(self.uri, "bits/referertest.html") + info = urljoin(self.uri, "/cgi-bin/cookietest.cgi") r = br.open(info) self.assert_(referer not in r.get_data()) @@ -286,8 +286,7 @@ try: install_opener(o) try: - r = urlopen( - "http://wwwsearch.sourceforge.net/cgi-bin/cookietest.cgi") + r = urlopen(urljoin(self.uri, "/cgi-bin/cookietest.cgi")) except urllib2.URLError, e: #print e.read() raise @@ -317,7 +316,7 @@ opener.open, urljoin(self.uri, "norobots")) def test_urlretrieve(self): - url = "http://www.python.org/" + url = urljoin(self.uri, "/mechanize/") test_filename = "python.html" def check_retrieve(opener, filename, headers): self.assertEqual(headers.get('Content-Type'), 'text/html') Modified: wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py ============================================================================== --- wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py (original) +++ wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py Thu May 31 01:13:20 2007 @@ -16,7 +16,7 @@ import os, sys, re, time from twisted.web2 import server, http, resource, channel, \ - static, http_headers, responsecode + static, http_headers, responsecode, twcgi from twisted.internet import reactor @@ -47,6 +47,16 @@ """ % (("0123456789ABCDEF"*4+"\n")*61) +REFERER_TEST_HTML = """\ + + +mechanize Referer (sic) test page + + +

This page exists to test the Referer functionality of mechanize. +

Here is a link to a page that displays the Referer header. + +""" class Page(resource.Resource): @@ -84,6 +94,11 @@ setattr(root, "child_"+name, redirect) return redirect +def make_cgi_bin(parent, name, dir_name): + cgi_bin = twcgi.CGIDirectory(dir_name) + setattr(parent, "child_"+name, cgi_bin) + return cgi_bin + def main(): root = Page() root.text = ROOT_HTML @@ -97,8 +112,10 @@ make_leaf_page(bits, "cctest2.txt", "Hello ClientCookie functional test suite.", "text/plain") + make_leaf_page(bits, "referertest.html", REFERER_TEST_HTML) make_leaf_page(bits, "mechanize_reload_test.html", RELOAD_TEST_HTML) make_redirect(root, "redirected", "/doesnotexist") + make_cgi_bin(root, "cgi-bin", "examples") site = server.Site(root) reactor.listenTCP(int(sys.argv[1]), channel.HTTPFactory(site)) From jjlee at codespeak.net Thu May 31 01:23:24 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 31 May 2007 01:23:24 +0200 (CEST) Subject: [wwwsearch-commits] r43929 - wwwsearch/mechanize/trunk/test-tools Message-ID: <20070530232324.6F72B8093@code0.codespeak.net> Author: jjlee Date: Thu May 31 01:23:19 2007 New Revision: 43929 Modified: wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py Log: Minor refactoring Modified: wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py ============================================================================== --- wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py (original) +++ wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py Thu May 31 01:23:19 2007 @@ -70,7 +70,7 @@ {"content-type": self.content_type}, self.text) -def _make_page(root, name, text, +def _make_page(parent, name, text, content_type="text/html", leaf=False): page = Page() @@ -78,20 +78,20 @@ base_type, specific_type = content_type.split("/") page.content_type = http_headers.MimeType(base_type, specific_type) page.addSlash = not leaf - setattr(root, "child_"+name, page) + setattr(parent, "child_"+name, page) return page -def make_page(root, name, text, +def make_page(parent, name, text, content_type="text/html"): - return _make_page(root, name, text, content_type, leaf=False) + return _make_page(parent, name, text, content_type, leaf=False) -def make_leaf_page(root, name, text, +def make_leaf_page(parent, name, text, content_type="text/html"): - return _make_page(root, name, text, content_type, leaf=True) + return _make_page(parent, name, text, content_type, leaf=True) -def make_redirect(root, name, location_relative_ref): +def make_redirect(parent, name, location_relative_ref): redirect = resource.RedirectResource(path=location_relative_ref) - setattr(root, "child_"+name, redirect) + setattr(parent, "child_"+name, redirect) return redirect def make_cgi_bin(parent, name, dir_name): From jjlee at codespeak.net Thu May 31 01:26:13 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 31 May 2007 01:26:13 +0200 (CEST) Subject: [wwwsearch-commits] r43930 - in wwwsearch/mechanize/trunk: examples test-tools Message-ID: <20070530232613.63DBC8093@code0.codespeak.net> Author: jjlee Date: Thu May 31 01:26:12 2007 New Revision: 43930 Added: wwwsearch/mechanize/trunk/test-tools/cookietest.cgi - copied, changed from r43808, wwwsearch/mechanize/trunk/examples/cookietest.cgi Removed: wwwsearch/mechanize/trunk/examples/cookietest.cgi Modified: wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py Log: Move cookietest.cgi from examples into test-tools Deleted: /wwwsearch/mechanize/trunk/examples/cookietest.cgi ============================================================================== --- /wwwsearch/mechanize/trunk/examples/cookietest.cgi Thu May 31 01:26:12 2007 +++ (empty file) @@ -1,43 +0,0 @@ -#!/usr/bin/python -# -*-python-*- - -# The copy of this script that lives at wwwsearch.sf.net is used by the -# mechanize functional tests. - -print "Content-Type: text/html" -print "Set-Cookie: foo=bar\n" -import sys, os, string, cgi, Cookie - -from types import ListType - -print "Cookies and form submission parameters" -cookie = Cookie.SimpleCookie() -cookieHdr = os.environ.get("HTTP_COOKIE", "") -cookie.load(cookieHdr) -if not cookie.has_key("foo"): - print '' -print "" -print "

Received cookies:

" -print "
"
-print cgi.escape(os.environ.get("HTTP_COOKIE", ""))
-print "
" -if cookie.has_key("foo"): - print "Your browser supports cookies!" -print "

Referer:

" -print "
"
-print cgi.escape(os.environ.get("HTTP_REFERER", ""))
-print "
" -form = cgi.FieldStorage() -print "

Received parameters:

" -print "
"
-for k in form.keys():
-    v = form[k]
-    if isinstance(v, ListType):
-        vs = []
-        for item in v:
-            vs.append(item.value)
-        text = string.join(vs, ", ")
-    else:
-        text = v.value
-    print "%s: %s" % (cgi.escape(k), cgi.escape(text))
-print "
" Copied: wwwsearch/mechanize/trunk/test-tools/cookietest.cgi (from r43808, wwwsearch/mechanize/trunk/examples/cookietest.cgi) ============================================================================== --- wwwsearch/mechanize/trunk/examples/cookietest.cgi (original) +++ wwwsearch/mechanize/trunk/test-tools/cookietest.cgi Thu May 31 01:26:12 2007 @@ -1,8 +1,7 @@ #!/usr/bin/python # -*-python-*- -# The copy of this script that lives at wwwsearch.sf.net is used by the -# mechanize functional tests. +# This is used by functional_tests.py print "Content-Type: text/html" print "Set-Cookie: foo=bar\n" Modified: wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py ============================================================================== --- wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py (original) +++ wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py Thu May 31 01:26:12 2007 @@ -115,7 +115,7 @@ make_leaf_page(bits, "referertest.html", REFERER_TEST_HTML) make_leaf_page(bits, "mechanize_reload_test.html", RELOAD_TEST_HTML) make_redirect(root, "redirected", "/doesnotexist") - make_cgi_bin(root, "cgi-bin", "examples") + make_cgi_bin(root, "cgi-bin", "test-tools") site = server.Site(root) reactor.listenTCP(int(sys.argv[1]), channel.HTTPFactory(site)) From jjlee at codespeak.net Thu May 31 01:29:50 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 31 May 2007 01:29:50 +0200 (CEST) Subject: [wwwsearch-commits] r43931 - wwwsearch/mechanize/trunk/test-tools Message-ID: <20070530232950.DDFA88097@code0.codespeak.net> Author: jjlee Date: Thu May 31 01:29:50 2007 New Revision: 43931 Modified: wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py Log: Clean up local server imports Modified: wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py ============================================================================== --- wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py (original) +++ wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py Thu May 31 01:29:50 2007 @@ -14,10 +14,10 @@ XXX installation instructions """ -import os, sys, re, time -from twisted.web2 import server, http, resource, channel, \ - static, http_headers, responsecode, twcgi +import sys, re +from twisted.web2 import server, http, resource, channel, \ + http_headers, responsecode, twcgi from twisted.internet import reactor def html(title=None): From jjlee at codespeak.net Thu May 31 01:36:49 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 31 May 2007 01:36:49 +0200 (CEST) Subject: [wwwsearch-commits] r43932 - in wwwsearch/mechanize/trunk: . mechanize Message-ID: <20070530233649.1DC098097@code0.codespeak.net> Author: jjlee Date: Thu May 31 01:36:48 2007 New Revision: 43932 Modified: wwwsearch/mechanize/trunk/README.html.in wwwsearch/mechanize/trunk/mechanize/_mechanize.py wwwsearch/mechanize/trunk/setup.py Log: Update version Modified: wwwsearch/mechanize/trunk/README.html.in ============================================================================== --- wwwsearch/mechanize/trunk/README.html.in (original) +++ wwwsearch/mechanize/trunk/README.html.in Thu May 31 01:36:48 2007 @@ -435,7 +435,7 @@

Development release.

    -@{version = "0.1.6b"} +@{version = "0.1.7b"}
  • mechanize-@(version).tar.gz
  • mechanize-@(version).zip
  • Change Log (included in distribution) Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Thu May 31 01:36:48 2007 @@ -17,7 +17,7 @@ import _request import _rfc3986 -__version__ = (0, 1, 6, "b", None) # 0.1.6b +__version__ = (0, 1, 7, "b", None) # 0.1.7b class BrowserStateError(Exception): pass class LinkNotFoundError(Exception): pass Modified: wwwsearch/mechanize/trunk/setup.py ============================================================================== --- wwwsearch/mechanize/trunk/setup.py (original) +++ wwwsearch/mechanize/trunk/setup.py Thu May 31 01:36:48 2007 @@ -52,7 +52,7 @@ ## VERSION_MATCH = re.search(r'__version__ = \((.*)\)', ## open("mechanize/_mechanize.py").read()) ## VERSION = unparse_version(str_to_tuple(VERSION_MATCH.group(1))) -VERSION = "0.1.6b" +VERSION = "0.1.7b" INSTALL_REQUIRES = ["ClientForm>=0.2.6, ==dev"] NAME = "mechanize" PACKAGE = True From jjlee at codespeak.net Thu May 31 01:53:45 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 31 May 2007 01:53:45 +0200 (CEST) Subject: [wwwsearch-commits] r43933 - wwwsearch/mechanize/common Message-ID: <20070530235345.A1F638093@code0.codespeak.net> Author: jjlee Date: Thu May 31 01:53:45 2007 New Revision: 43933 Modified: wwwsearch/mechanize/common/ChangeLog Log: Update ChangeLog Modified: wwwsearch/mechanize/common/ChangeLog ============================================================================== --- wwwsearch/mechanize/common/ChangeLog (original) +++ wwwsearch/mechanize/common/ChangeLog Thu May 31 01:53:45 2007 @@ -1,6 +1,43 @@ This isn't really in proper GNU ChangeLog format, it just happens to look that way. +2007-05-31 John J Lee + * 0.1.7b release. + * Sub-requests should not usually be visiting, so make it so. In + fact the visible behaviour wasn't really broken here, since + .back() skips over None responses (which is odd in itself, but + won't be changed until after stable release is branched). + However, this patch does change visible behaviour in that it + creates new Request objects for sub-requests (e.g. basic auth + retries) where previously we just mutated the existing Request + object. + * Changes to sort out abuse of by SeekableProcessor and + ResponseUpgradeProcessor (latter shouldn't have been public in + the first place) and resulting confusing / unclear / broken + behaviour. Deprecate SeekableProcessor and + ResponseUpgradeProcessor. Add SeekableResponseOpener. Remove + SeekableProcessor and ResponseUpgradeProcessor from Browser. + Move UserAgentBase.add_referer_header() to Browser (it was on by + default, breaking UserAgent, and should never really have been + there). + * Fix HTTP proxy support: r29110 meant that Request.get_selector() + didn't take into account the change to .__r_host + (Thanks tgates at ...). + * Redirected robots.txt fetch no longer results in another + attempted robots.txt fetch to check the redirection is allowed! + * Fix exception raised by RFC 3986 implementation with + urljoin(base, '/..') + * Fix two multiple-response-wrapping bugs. + * Add missing import in tests (caused failure on Windows). + * Set svn:eol-style to native for all text files in SVN. + * Add some tests for upgrade_response(). + * Add a functional test for 302 + 404 case. + * Add an -l option to run the functional tests against a local + twisted.web2-based server (you need Twisted installed for this + to work). This is much faster than running against + wwwsearch.sourceforge.net + * Add -u switch to skip unittests (and only run the doctests). + 2007-01-07 John J Lee * 0.1.6b release From jjlee at codespeak.net Thu May 31 01:54:35 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 31 May 2007 01:54:35 +0200 (CEST) Subject: [wwwsearch-commits] r43934 - wwwsearch/mechanize/tag/release/0.1.7b-2007-05-31T00:55:15 Message-ID: <20070530235435.4972A8099@code0.codespeak.net> Author: jjlee Date: Thu May 31 01:54:34 2007 New Revision: 43934 Added: wwwsearch/mechanize/tag/release/0.1.7b-2007-05-31T00:55:15/ - copied from r43933, wwwsearch/mechanize/trunk/ Modified: wwwsearch/mechanize/tag/release/0.1.7b-2007-05-31T00:55:15/setup.cfg Log: Tagged mechanize (trunk -r HEAD, from working copy) release 0.1.7b-2007-05-31T00:55:15 Modified: wwwsearch/mechanize/tag/release/0.1.7b-2007-05-31T00:55:15/setup.cfg ============================================================================== --- wwwsearch/mechanize/trunk/setup.cfg (original) +++ wwwsearch/mechanize/tag/release/0.1.7b-2007-05-31T00:55:15/setup.cfg Thu May 31 01:54:34 2007 @@ -1,3 +0,0 @@ -[egg_info] -tag_build = .dev -tag_svn_revision = 1 From jjlee at codespeak.net Thu May 31 02:04:54 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 31 May 2007 02:04:54 +0200 (CEST) Subject: [wwwsearch-commits] r43935 - in wwwsearch/mechanize/trunk: . mechanize Message-ID: <20070531000454.CDD5D8093@code0.codespeak.net> Author: jjlee Date: Thu May 31 02:04:54 2007 New Revision: 43935 Modified: wwwsearch/mechanize/trunk/README.html.in wwwsearch/mechanize/trunk/mechanize/_mechanize.py wwwsearch/mechanize/trunk/setup.py Log: Update version Modified: wwwsearch/mechanize/trunk/README.html.in ============================================================================== --- wwwsearch/mechanize/trunk/README.html.in (original) +++ wwwsearch/mechanize/trunk/README.html.in Thu May 31 02:04:54 2007 @@ -435,7 +435,7 @@

    Development release.

      -@{version = "0.1.7b"} +@{version = "0.1.8b"}
    • mechanize-@(version).tar.gz
    • mechanize-@(version).zip
    • Change Log (included in distribution) Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Thu May 31 02:04:54 2007 @@ -17,7 +17,7 @@ import _request import _rfc3986 -__version__ = (0, 1, 7, "b", None) # 0.1.7b +__version__ = (0, 1, 8, "b", None) # 0.1.8b class BrowserStateError(Exception): pass class LinkNotFoundError(Exception): pass Modified: wwwsearch/mechanize/trunk/setup.py ============================================================================== --- wwwsearch/mechanize/trunk/setup.py (original) +++ wwwsearch/mechanize/trunk/setup.py Thu May 31 02:04:54 2007 @@ -52,7 +52,7 @@ ## VERSION_MATCH = re.search(r'__version__ = \((.*)\)', ## open("mechanize/_mechanize.py").read()) ## VERSION = unparse_version(str_to_tuple(VERSION_MATCH.group(1))) -VERSION = "0.1.7b" +VERSION = "0.1.8b" INSTALL_REQUIRES = ["ClientForm>=0.2.6, ==dev"] NAME = "mechanize" PACKAGE = True From jjlee at codespeak.net Thu May 31 02:12:29 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 31 May 2007 02:12:29 +0200 (CEST) Subject: [wwwsearch-commits] r43936 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20070531001229.62E668093@code0.codespeak.net> Author: jjlee Date: Thu May 31 02:12:29 2007 New Revision: 43936 Modified: wwwsearch/mechanize/trunk/mechanize/__init__.py wwwsearch/mechanize/trunk/mechanize/_urllib2.py wwwsearch/mechanize/trunk/mechanize/_useragent.py Log: Remove gopher support, since Python 2.6 no longer supports it (titus at caltech.edu) Modified: wwwsearch/mechanize/trunk/mechanize/__init__.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/__init__.py (original) +++ wwwsearch/mechanize/trunk/mechanize/__init__.py Thu May 31 02:12:29 2007 @@ -17,8 +17,6 @@ 'FileHandler', 'FormNotFoundError', 'FormsFactory', - 'GopherError', - 'GopherHandler', 'HTTPBasicAuthHandler', 'HTTPCookieProcessor', 'HTTPDefaultErrorHandler', Modified: wwwsearch/mechanize/trunk/mechanize/_urllib2.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_urllib2.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_urllib2.py Thu May 31 02:12:29 2007 @@ -2,8 +2,7 @@ # ...from urllib2... from urllib2 import \ URLError, \ - HTTPError, \ - GopherError + HTTPError # ...and from mechanize from _opener import OpenerDirector, \ SeekableResponseOpener, \ @@ -32,8 +31,7 @@ UnknownHandler, \ FTPHandler, \ CacheFTPHandler, \ - FileHandler, \ - GopherHandler + FileHandler # ...and from mechanize from _http import \ HTTPHandler, \ Modified: wwwsearch/mechanize/trunk/mechanize/_useragent.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_useragent.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_useragent.py Thu May 31 02:12:29 2007 @@ -50,7 +50,6 @@ # CacheFTPHandler is buggy, at least in 2.3, so we don't use it "ftp": _urllib2.FTPHandler, "file": _urllib2.FileHandler, - "gopher": _urllib2.GopherHandler, # other handlers "_unknown": _urllib2.UnknownHandler, @@ -77,7 +76,7 @@ "_debug_response_body": _urllib2.HTTPResponseDebugProcessor, } - default_schemes = ["http", "ftp", "file", "gopher"] + default_schemes = ["http", "ftp", "file"] default_others = ["_unknown", "_http_error", "_http_request_upgrade", "_http_default_error", ] From jjlee at codespeak.net Sat Jun 2 14:18:50 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 2 Jun 2007 14:18:50 +0200 (CEST) Subject: [wwwsearch-commits] r43990 - wwwsearch/mechanize/trunk/test Message-ID: <20070602121850.A622480B2@code0.codespeak.net> Author: jjlee Date: Sat Jun 2 14:18:49 2007 New Revision: 43990 Modified: wwwsearch/mechanize/trunk/test/test_useragent.py Log: Fix test failure Modified: wwwsearch/mechanize/trunk/test/test_useragent.py ============================================================================== --- wwwsearch/mechanize/trunk/test/test_useragent.py (original) +++ wwwsearch/mechanize/trunk/test/test_useragent.py Sat Jun 2 14:18:49 2007 @@ -24,7 +24,7 @@ {"blah": BlahHandler, "_blah": BlahProcessor}) ua = TestUserAgent() - self.assertEqual(len(ua.handlers), 5) + self.assertEqual(len(ua.handlers), 4) ua.set_handled_schemes(["http", "https"]) self.assertEqual(len(ua.handlers), 2) self.assertRaises(ValueError, From jjlee at codespeak.net Sat Jun 2 14:23:54 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 2 Jun 2007 14:23:54 +0200 (CEST) Subject: [wwwsearch-commits] r43991 - in wwwsearch/mechanize/trunk: . mechanize Message-ID: <20070602122354.035B080B2@code0.codespeak.net> Author: jjlee Date: Sat Jun 2 14:23:54 2007 New Revision: 43991 Modified: wwwsearch/mechanize/trunk/functional_tests.py wwwsearch/mechanize/trunk/mechanize/_mechanize.py Log: Add convenience method Browser.open_local_file(filename) Modified: wwwsearch/mechanize/trunk/functional_tests.py ============================================================================== --- wwwsearch/mechanize/trunk/functional_tests.py (original) +++ wwwsearch/mechanize/trunk/functional_tests.py Sat Jun 2 14:23:54 2007 @@ -96,6 +96,14 @@ r = self.browser.open(url) self.assert_("this string appears in this file ;-)" in r.read()) + def test_open_local_file(self): + # Since the file: URL scheme is not well standardised, Browser has a + # special method to open files by name, for convenience: + br = mechanize.Browser() + response = br.open_local_file("mechanize/_mechanize.py") + self.assert_("def open_local_file(self, filename):" in + response.get_data()) + def test_open_novisit(self): def test_state(br): self.assert_(br.request is None) Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Sat Jun 2 14:23:54 2007 @@ -9,7 +9,8 @@ """ -import urllib2, sys, copy, re +import urllib2, sys, copy, re, os, urllib + from _useragent import UserAgentBase from _html import DefaultFactory @@ -24,6 +25,14 @@ class FormNotFoundError(Exception): pass +def sanepathname2url(path): + urlpath = urllib.pathname2url(path) + if os.name == "nt" and urlpath.startswith("///"): + urlpath = urlpath[2:] + # XXX don't ask me about the mac... + return urlpath + + class History: """ @@ -275,6 +284,11 @@ """ return copy.copy(self._response) + def open_local_file(self, filename): + path = sanepathname2url(os.path.abspath(filename)) + url = 'file://'+path + return self.open(url) + def set_response(self, response): """Replace current response with (a copy of) response. From jjlee at codespeak.net Sat Jun 9 18:01:37 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 9 Jun 2007 18:01:37 +0200 (CEST) Subject: [wwwsearch-commits] r44118 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20070609160137.166E980CA@code0.codespeak.net> Author: jjlee Date: Sat Jun 9 18:01:37 2007 New Revision: 44118 Modified: wwwsearch/mechanize/trunk/mechanize/_beautifulsoup.py Log: Make bundled BeautifulSoup 2 emacs syntax highlighting-friendly Modified: wwwsearch/mechanize/trunk/mechanize/_beautifulsoup.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_beautifulsoup.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_beautifulsoup.py Sat Jun 9 18:01:37 2007 @@ -37,7 +37,7 @@ parsing strategy specific to an XML schema or a particular bizarre HTML document. Typically your subclass would just override SELF_CLOSING_TAGS and/or NESTABLE_TAGS. -""" +""" #" from __future__ import generators __author__ = "Leonard Richardson (leonardr at segfault.org)" From jjlee at codespeak.net Sat Jun 9 18:49:18 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 9 Jun 2007 18:49:18 +0200 (CEST) Subject: [wwwsearch-commits] r44119 - in wwwsearch/mechanize/trunk: mechanize test Message-ID: <20070609164918.DB7CA80CB@code0.codespeak.net> Author: jjlee Date: Sat Jun 9 18:49:18 2007 New Revision: 44119 Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py wwwsearch/mechanize/trunk/test/test_browser.doctest wwwsearch/mechanize/trunk/test/test_browser.py Log: Fix selection of global form using .select_form() (titus at idyll.org) Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Sat Jun 9 18:49:18 2007 @@ -493,7 +493,10 @@ nr, if supplied, is the sequence number of the form (where 0 is the first). Note that control 0 is the first form matching all the other arguments (if supplied); it is not necessarily the first control in the - form. + form. The "global form" (consisting of all form controls not contained + in any FORM element) is considered not to be part of this sequence and + to have no name, so will not be matched unless both name and nr are + None. """ if not self.viewing_html(): @@ -502,6 +505,12 @@ raise ValueError( "at least one argument must be supplied to specify form") + global_form = self._factory.global_form + if nr is None and name is None and \ + predicate is not None and predicate(global_form): + self.form = global_form + return + orig_nr = nr for form in self.forms(): if name is not None and name != form.name: Modified: wwwsearch/mechanize/trunk/test/test_browser.doctest ============================================================================== --- wwwsearch/mechanize/trunk/test/test_browser.doctest (original) +++ wwwsearch/mechanize/trunk/test/test_browser.doctest Sat Jun 9 18:49:18 2007 @@ -252,3 +252,28 @@ 1 >>> iter(br.forms()).next().find_control(nr=0).name 'b' + + + +.select_form() works with the global form + +>>> import ClientForm +>>> from mechanize._response import test_html_response +>>> br = TestBrowser2() +>>> br.visit_response(test_html_response("""\ +... +... +...
      +... +...
      +... """)) +>>> def has_a(form): +... try: +... form.find_control(name="a") +... except ClientForm.ControlNotFoundError: +... return False +... else: +... return True +>>> br.select_form(predicate=has_a) +>>> br.form.find_control(name="a").value +'b' Modified: wwwsearch/mechanize/trunk/test/test_browser.py ============================================================================== --- wwwsearch/mechanize/trunk/test/test_browser.py (original) +++ wwwsearch/mechanize/trunk/test/test_browser.py Sat Jun 9 18:49:18 2007 @@ -394,7 +394,7 @@ self.assertRaises(mechanize.FormNotFoundError, b.select_form, name="blah") self.assertRaises(mechanize.FormNotFoundError, b.select_form, - predicate=lambda x: True) + predicate=lambda form: form is not b.global_form()) self.assertRaises(mechanize.LinkNotFoundError, b.find_link, name="blah") self.assertRaises(mechanize.LinkNotFoundError, b.find_link, From jjlee at codespeak.net Tue Jun 12 23:39:13 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Tue, 12 Jun 2007 23:39:13 +0200 (CEST) Subject: [wwwsearch-commits] r44214 - wwwsearch/mechanize/trunk Message-ID: <20070612213913.2AF8B81A6@code0.codespeak.net> Author: jjlee Date: Tue Jun 12 23:39:12 2007 New Revision: 44214 Modified: wwwsearch/mechanize/trunk/test.py Log: Tests no longer (accidentally) depend on third-party coverage module Modified: wwwsearch/mechanize/trunk/test.py ============================================================================== --- wwwsearch/mechanize/trunk/test.py (original) +++ wwwsearch/mechanize/trunk/test.py Tue Jun 12 23:39:12 2007 @@ -59,8 +59,8 @@ import doctest import testprogram - import coverage if run_coverage: + import coverage print 'running coverage' coverage.erase() coverage.start() From jjlee at codespeak.net Tue Jun 12 23:40:20 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Tue, 12 Jun 2007 23:40:20 +0200 (CEST) Subject: [wwwsearch-commits] r44215 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20070612214020.B710481A6@code0.codespeak.net> Author: jjlee Date: Tue Jun 12 23:40:20 2007 New Revision: 44215 Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py Log: Fix docstring typo and remove empty docstring Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Tue Jun 12 23:40:20 2007 @@ -379,7 +379,7 @@ The cookie is added in the same way as if it had arrived with the current response, as a result of the current request. This means that, - for example, it is not appropriate to set the cookie based on the + for example, if it is not appropriate to set the cookie based on the current request, no cookie will be set. The cookie will be returned automatically with subsequent responses @@ -452,7 +452,6 @@ return self._factory.is_html def encoding(self): - """""" if self._response is None: raise BrowserStateError("not viewing any document") return self._factory.encoding From jjlee at codespeak.net Sat Jun 16 23:49:03 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 16 Jun 2007 23:49:03 +0200 (CEST) Subject: [wwwsearch-commits] r44313 - in wwwsearch/mechanize/trunk: mechanize test Message-ID: <20070616214903.501FA80E5@code0.codespeak.net> Author: jjlee Date: Sat Jun 16 23:48:59 2007 New Revision: 44313 Modified: wwwsearch/mechanize/trunk/mechanize/_html.py wwwsearch/mechanize/trunk/test/test_html.doctest Log: Fix BeautifulSoup RobustLinksFactory (hence RobustFactory) link text parsing for case of link text containing tags (titus at idyll.org) Modified: wwwsearch/mechanize/trunk/mechanize/_html.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_html.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_html.py Sat Jun 16 23:48:59 2007 @@ -379,15 +379,15 @@ if not url: continue url = _rfc3986.clean_url(url, encoding) - text = link.firstText(lambda t: True) - if text is _beautifulsoup.Null: + text = link.fetchText(lambda t: True) + if not text: # follow _pullparser's weird behaviour rigidly if link.name == "a": text = "" else: text = None else: - text = self.compress_re.sub(" ", text.strip()) + text = self.compress_re.sub(" ", " ".join(text).strip()) yield Link(base_url, url, text, link.name, attrs) Modified: wwwsearch/mechanize/trunk/test/test_html.doctest ============================================================================== --- wwwsearch/mechanize/trunk/test/test_html.doctest (original) +++ wwwsearch/mechanize/trunk/test/test_html.doctest Sat Jun 16 23:48:59 2007 @@ -161,3 +161,55 @@ Traceback (most recent call last): ... StopIteration + + +Link text parsing + +>>> def get_first_link_text_bs(html): +... factory = RobustLinksFactory() +... soup = MechanizeBs("utf-8", html) +... factory.set_soup(soup, "http://example.com/", "utf-8") +... return list(factory.links())[0].text + +>>> def get_first_link_text_sgmllib(html): +... factory = LinksFactory() +... response = test_html_response(html) +... factory.set_response(response, "http://example.com/", "utf-8") +... return list(factory.links())[0].text + +Whitespace gets compressed down to single spaces. Tags are removed. + +>>> html = ("""\ +... Title +...

      The quick\tbrown fox jumps +... over the lazy dog +... +... """) +>>> get_first_link_text_bs(html) +'The quick brown fox jumps over the lazy dog' +>>> get_first_link_text_sgmllib(html) +'The quick brown fox jumps over the lazy dog' + +Empty links have empty link text + +>>> html = ("""\ +... Title +...

      +... +... """) +>>> get_first_link_text_bs(html) +'' +>>> get_first_link_text_sgmllib(html) +'' + +But for backwards-compatibility, empty non- links have None link text + +>>> html = ("""\ +... Title +...

      +... +... """) +>>> print get_first_link_text_bs(html) +None +>>> print get_first_link_text_sgmllib(html) +None