[wwwsearch-commits] r32996 - in wwwsearch/mechanize/trunk: . mechanize test
jjlee at codespeak.net
jjlee at codespeak.net
Sun Oct 8 01:34:38 CEST 2006
Author: jjlee
Date: Sun Oct 8 01:34:33 2006
New Revision: 32996
Modified:
wwwsearch/mechanize/trunk/functional_tests.py
wwwsearch/mechanize/trunk/mechanize/_http.py
wwwsearch/mechanize/trunk/mechanize/_mechanize.py
wwwsearch/mechanize/trunk/mechanize/_opener.py
wwwsearch/mechanize/trunk/mechanize/_request.py
wwwsearch/mechanize/trunk/mechanize/_upgrade.py
wwwsearch/mechanize/trunk/test/test_browser.doctest
wwwsearch/mechanize/trunk/test/test_urllib2.py
Log:
Add Browser.open_novisit() and Request.visit -- this fixes an issue where internal open of robots.txt would affect browser state; Also fix test_redirect in functional_tests.py (wasn't actually redirecting)
Modified: wwwsearch/mechanize/trunk/functional_tests.py
==============================================================================
--- wwwsearch/mechanize/trunk/functional_tests.py (original)
+++ wwwsearch/mechanize/trunk/functional_tests.py Sun Oct 8 01:34:33 2006
@@ -60,13 +60,27 @@
self.assertEqual(self.browser.title(), 'Python bits')
def test_redirect(self):
- # 302 redirect due to missing final '/'
- self.browser.open('http://wwwsearch.sourceforge.net')
+ # 301 redirect due to missing final '/'
+ r = self.browser.open('http://wwwsearch.sourceforge.net/bits')
+ self.assertEqual(r.code, 200)
+ self.assert_("GeneralFAQ.html" in r.read(2048))
def test_file_url(self):
url = "file://%s" % sanepathname2url(
os.path.abspath('functional_tests.py'))
- self.browser.open(url)
+ r = self.browser.open(url)
+ self.assert_("this string appears in this file ;-)" in r.read())
+
+ def test_open_novisit(self):
+ def test_state(br):
+ self.assert_(br.request is None)
+ self.assert_(br.response() is None)
+ self.assertRaises(mechanize.BrowserStateError, br.back)
+ test_state(self.browser)
+ # note this involves a redirect, which should itself be non-visiting
+ r = self.browser.open_novisit("http://wwwsearch.sourceforge.net/bits")
+ test_state(self.browser)
+ self.assert_("GeneralFAQ.html" in r.read(2048))
class ResponseTests(TestCase):
Modified: wwwsearch/mechanize/trunk/mechanize/_http.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_http.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_http.py Sun Oct 8 01:34:33 2006
@@ -76,10 +76,16 @@
# from the user (of urllib2, in this case). In practice,
# essentially all clients do redirect in this case, so we do
# the same.
+ try:
+ visit = req.visit
+ except AttributeError:
+ visit = None
return Request(newurl,
headers=req.headers,
origin_req_host=req.get_origin_req_host(),
- unverifiable=True)
+ unverifiable=True,
+ visit=visit,
+ )
else:
raise HTTPError(req.get_full_url(), code, msg, headers, fp)
@@ -348,8 +354,9 @@
"""Reads the robots.txt URL and feeds it to the parser."""
if self._opener is None:
self.set_opener()
+ req = Request(self.url, unverifiable=True, visit=False)
try:
- f = self._opener.open(self.url)
+ f = self._opener.open(req)
except HTTPError, f:
pass
except (IOError, socket.error, OSError), exc:
Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Sun Oct 8 01:34:33 2006
@@ -135,12 +135,25 @@
self.select_form = self.click = self.submit = self.click_link = None
self.follow_link = self.find_link = None
+ def open_novisit(self, url, data=None):
+ """Open a URL without visiting it.
+
+ The browser state (including .request, .response(), history, forms and
+ links) are all left unchanged by calling this function.
+
+ The interface is the same as for .open().
+
+ This is useful for things like fetching images.
+
+ See also .retrieve().
+
+ """
+ return self._mech_open(url, data, visit=False)
+
def open(self, url, data=None):
- if self._response is not None:
- self._response.close()
return self._mech_open(url, data)
- def _mech_open(self, url, data=None, update_history=True):
+ def _mech_open(self, url, data=None, update_history=True, visit=None):
try:
url.get_full_url
except AttributeError:
@@ -154,16 +167,23 @@
"can't fetch relative URL: not viewing any document")
url = urlparse.urljoin(self._response.geturl(), url)
- if self.request is not None and update_history:
- self._history.add(self.request, self._response)
- self._response = None
- # we want self.request to be assigned even if UserAgent.open fails
- self.request = self._request(url, data)
- self._previous_scheme = self.request.get_type()
+ request = self._request(url, data, visit)
+ visit = request.visit
+ if visit is None:
+ visit = True
+
+ if visit:
+ if self._response is not None:
+ self._response.close()
+ if self.request is not None and update_history:
+ self._history.add(self.request, self._response)
+ self._response = None
+ # we want self.request to be assigned even if UserAgent.open fails
+ self.request = request
success = True
try:
- response = UserAgent.open(self, self.request, data)
+ response = UserAgent.open(self, request, data)
except urllib2.HTTPError, error:
success = False
if error.fp is None: # not a response
@@ -180,9 +200,12 @@
## # Python core, a fix would need some backwards-compat. hack to be
## # acceptable.
## raise
- self.set_response(response)
- response = copy.copy(self._response)
+ if visit:
+ self.set_response(response)
+ response = copy.copy(self._response)
+ elif response is not None:
+ response = _upgrade.upgrade_response(response)
if not success:
raise response
Modified: wwwsearch/mechanize/trunk/mechanize/_opener.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_opener.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_opener.py Sun Oct 8 01:34:33 2006
@@ -136,18 +136,25 @@
self._any_request = any_request
self._any_response = any_response
- def _request(self, url_or_req, data):
+ def _request(self, url_or_req, data, visit):
if isstringlike(url_or_req):
- req = Request(url_or_req, data)
+ req = Request(url_or_req, data, visit=visit)
else:
# already a urllib2.Request or mechanize.Request instance
req = url_or_req
if data is not None:
req.add_data(data)
+ # XXX yuck, give request a .visit attribute if it doesn't have one
+ try:
+ req.visit
+ except AttributeError:
+ req.visit = None
+ if visit is not None:
+ req.visit = visit
return req
def open(self, fullurl, data=None):
- req = self._request(fullurl, data)
+ req = self._request(fullurl, data, None)
req_scheme = req.get_type()
self._maybe_reindex_handlers()
@@ -222,7 +229,7 @@
headers) that would have been returned.
"""
- req = self._request(fullurl, data)
+ req = self._request(fullurl, data, False)
scheme = req.get_type()
fp = self.open(req)
headers = fp.info()
Modified: wwwsearch/mechanize/trunk/mechanize/_request.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_request.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_request.py Sun Oct 8 01:34:33 2006
@@ -16,10 +16,11 @@
class Request(urllib2.Request):
def __init__(self, url, data=None, headers={},
- origin_req_host=None, unverifiable=False):
+ origin_req_host=None, unverifiable=False, visit=None):
urllib2.Request.__init__(self, url, data, headers)
self.selector = None
self.unredirected_hdrs = {}
+ self.visit = visit
# All the terminology below comes from RFC 2965.
self.unverifiable = unverifiable
Modified: wwwsearch/mechanize/trunk/mechanize/_upgrade.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_upgrade.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_upgrade.py Sun Oct 8 01:34:33 2006
@@ -17,6 +17,8 @@
except AttributeError: pass
try: newrequest.unverifiable = request.unverifiable
except AttributeError: pass
+ try: newrequest.visit = request.visit
+ except AttributeError: pass
request = newrequest
return request
Modified: wwwsearch/mechanize/trunk/test/test_browser.doctest
==============================================================================
--- wwwsearch/mechanize/trunk/test/test_browser.doctest (original)
+++ wwwsearch/mechanize/trunk/test/test_browser.doctest Sun Oct 8 01:34:33 2006
@@ -67,3 +67,34 @@
True
>>> len(br._history._history)
0
+
+
+.open()ing a Request with False .visit does not affect Browser state.
+Redirections during such a non-visiting request should also be
+non-visiting.
+
+>>> from mechanize import BrowserStateError, Request, HTTPRedirectHandler
+>>> from test_urllib2 import MockHTTPHandler
+
+>>> req = Request("http://example.com")
+>>> req.visit = False
+>>> br = TestBrowser2()
+>>> hh = MockHTTPHandler(301, "Location: http://example.com/\r\n\r\n")
+>>> br.add_handler(hh)
+>>> br.add_handler(HTTPRedirectHandler())
+>>> def raises(exc_class, fn, *args, **kwds):
+... try:
+... fn(*args, **kwds)
+... except exc_class, exc:
+... return True
+... return False
+>>> def test_state(br):
+... return (br.request is None and
+... br.response() is None and
+... raises(BrowserStateError, br.back)
+... )
+>>> test_state(br)
+True
+>>> r = br.open(req)
+>>> test_state(br)
+True
Modified: wwwsearch/mechanize/trunk/test/test_urllib2.py
==============================================================================
--- wwwsearch/mechanize/trunk/test/test_urllib2.py (original)
+++ wwwsearch/mechanize/trunk/test/test_urllib2.py Sun Oct 8 01:34:33 2006
@@ -22,6 +22,7 @@
import mechanize
from mechanize._http import AbstractHTTPHandler, parse_head
+from mechanize._response import test_response
from mechanize import HTTPRedirectHandler, HTTPRequestUpgradeProcessor, \
HTTPEquivProcessor, HTTPRefreshProcessor, SeekableProcessor, \
HTTPCookieProcessor, HTTPRefererProcessor, \
@@ -1092,11 +1093,10 @@
self._count = self._count + 1
msg = mimetools.Message(StringIO(self.headers))
return self.parent.error(
- "http", req, MockFile(), self.code, "Blah", msg)
+ "http", req, test_response(), self.code, "Blah", msg)
else:
self.req = req
- msg = mimetools.Message(StringIO("\r\n\r\n"))
- return MockResponse(200, "OK", msg, "", req.get_full_url())
+ return test_response("", [], req.get_full_url())
class MyHTTPHandler(HTTPHandler): pass
More information about the wwwsearch-commits
mailing list