[wwwsearch-commits] r32996 - in wwwsearch/mechanize/trunk: . mechanize test

jjlee at codespeak.net jjlee at codespeak.net
Sun Oct 8 01:34:38 CEST 2006


Author: jjlee
Date: Sun Oct  8 01:34:33 2006
New Revision: 32996

Modified:
   wwwsearch/mechanize/trunk/functional_tests.py
   wwwsearch/mechanize/trunk/mechanize/_http.py
   wwwsearch/mechanize/trunk/mechanize/_mechanize.py
   wwwsearch/mechanize/trunk/mechanize/_opener.py
   wwwsearch/mechanize/trunk/mechanize/_request.py
   wwwsearch/mechanize/trunk/mechanize/_upgrade.py
   wwwsearch/mechanize/trunk/test/test_browser.doctest
   wwwsearch/mechanize/trunk/test/test_urllib2.py
Log:
Add Browser.open_novisit() and Request.visit -- this fixes an issue where internal open of robots.txt would affect browser state; Also fix test_redirect in functional_tests.py (wasn't actually redirecting)

Modified: wwwsearch/mechanize/trunk/functional_tests.py
==============================================================================
--- wwwsearch/mechanize/trunk/functional_tests.py	(original)
+++ wwwsearch/mechanize/trunk/functional_tests.py	Sun Oct  8 01:34:33 2006
@@ -60,13 +60,27 @@
         self.assertEqual(self.browser.title(), 'Python bits')
 
     def test_redirect(self):
-        # 302 redirect due to missing final '/'
-        self.browser.open('http://wwwsearch.sourceforge.net')
+        # 301 redirect due to missing final '/'
+        r = self.browser.open('http://wwwsearch.sourceforge.net/bits')
+        self.assertEqual(r.code, 200)
+        self.assert_("GeneralFAQ.html" in r.read(2048))
 
     def test_file_url(self):
         url = "file://%s" % sanepathname2url(
             os.path.abspath('functional_tests.py'))
-        self.browser.open(url)
+        r = self.browser.open(url)
+        self.assert_("this string appears in this file ;-)" in r.read())
+
+    def test_open_novisit(self):
+        def test_state(br):
+            self.assert_(br.request is None)
+            self.assert_(br.response() is None)
+            self.assertRaises(mechanize.BrowserStateError, br.back)
+        test_state(self.browser)
+        # note this involves a redirect, which should itself be non-visiting
+        r = self.browser.open_novisit("http://wwwsearch.sourceforge.net/bits")
+        test_state(self.browser)
+        self.assert_("GeneralFAQ.html" in r.read(2048))
 
 
 class ResponseTests(TestCase):

Modified: wwwsearch/mechanize/trunk/mechanize/_http.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_http.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_http.py	Sun Oct  8 01:34:33 2006
@@ -76,10 +76,16 @@
             # from the user (of urllib2, in this case).  In practice,
             # essentially all clients do redirect in this case, so we do
             # the same.
+            try:
+                visit = req.visit
+            except AttributeError:
+                visit = None
             return Request(newurl,
                            headers=req.headers,
                            origin_req_host=req.get_origin_req_host(),
-                           unverifiable=True)
+                           unverifiable=True,
+                           visit=visit,
+                           )
         else:
             raise HTTPError(req.get_full_url(), code, msg, headers, fp)
 
@@ -348,8 +354,9 @@
             """Reads the robots.txt URL and feeds it to the parser."""
             if self._opener is None:
                 self.set_opener()
+            req = Request(self.url, unverifiable=True, visit=False)
             try:
-                f = self._opener.open(self.url)
+                f = self._opener.open(req)
             except HTTPError, f:
                 pass
             except (IOError, socket.error, OSError), exc:

Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_mechanize.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py	Sun Oct  8 01:34:33 2006
@@ -135,12 +135,25 @@
         self.select_form = self.click = self.submit = self.click_link = None
         self.follow_link = self.find_link = None
 
+    def open_novisit(self, url, data=None):
+        """Open a URL without visiting it.
+
+        The browser state (including .request, .response(), history, forms and
+        links) are all left unchanged by calling this function.
+
+        The interface is the same as for .open().
+
+        This is useful for things like fetching images.
+
+        See also .retrieve().
+
+        """
+        return self._mech_open(url, data, visit=False)
+
     def open(self, url, data=None):
-        if self._response is not None:
-            self._response.close()
         return self._mech_open(url, data)
 
-    def _mech_open(self, url, data=None, update_history=True):
+    def _mech_open(self, url, data=None, update_history=True, visit=None):
         try:
             url.get_full_url
         except AttributeError:
@@ -154,16 +167,23 @@
                         "can't fetch relative URL: not viewing any document")
                 url = urlparse.urljoin(self._response.geturl(), url)
 
-        if self.request is not None and update_history:
-            self._history.add(self.request, self._response)
-        self._response = None
-        # we want self.request to be assigned even if UserAgent.open fails
-        self.request = self._request(url, data)
-        self._previous_scheme = self.request.get_type()
+        request = self._request(url, data, visit)
+        visit = request.visit
+        if visit is None:
+            visit = True
+
+        if visit:
+            if self._response is not None:
+                self._response.close()
+            if self.request is not None and update_history:
+                self._history.add(self.request, self._response)
+            self._response = None
+            # we want self.request to be assigned even if UserAgent.open fails
+            self.request = request
 
         success = True
         try:
-            response = UserAgent.open(self, self.request, data)
+            response = UserAgent.open(self, request, data)
         except urllib2.HTTPError, error:
             success = False
             if error.fp is None:  # not a response
@@ -180,9 +200,12 @@
 ##             # Python core, a fix would need some backwards-compat. hack to be
 ##             # acceptable.
 ##             raise
-        self.set_response(response)
 
-        response = copy.copy(self._response)
+        if visit:
+            self.set_response(response)
+            response = copy.copy(self._response)
+        elif response is not None:
+            response = _upgrade.upgrade_response(response)
 
         if not success:
             raise response

Modified: wwwsearch/mechanize/trunk/mechanize/_opener.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_opener.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_opener.py	Sun Oct  8 01:34:33 2006
@@ -136,18 +136,25 @@
         self._any_request = any_request
         self._any_response = any_response
 
-    def _request(self, url_or_req, data):
+    def _request(self, url_or_req, data, visit):
         if isstringlike(url_or_req):
-            req = Request(url_or_req, data)
+            req = Request(url_or_req, data, visit=visit)
         else:
             # already a urllib2.Request or mechanize.Request instance
             req = url_or_req
             if data is not None:
                 req.add_data(data)
+            # XXX yuck, give request a .visit attribute if it doesn't have one
+            try:
+                req.visit
+            except AttributeError:
+                req.visit = None
+            if visit is not None:
+                req.visit = visit
         return req
 
     def open(self, fullurl, data=None):
-        req = self._request(fullurl, data)
+        req = self._request(fullurl, data, None)
         req_scheme = req.get_type()
 
         self._maybe_reindex_handlers()
@@ -222,7 +229,7 @@
         headers) that would have been returned.
 
         """
-        req = self._request(fullurl, data)
+        req = self._request(fullurl, data, False)
         scheme = req.get_type()
         fp = self.open(req)
         headers = fp.info()

Modified: wwwsearch/mechanize/trunk/mechanize/_request.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_request.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_request.py	Sun Oct  8 01:34:33 2006
@@ -16,10 +16,11 @@
 
 class Request(urllib2.Request):
     def __init__(self, url, data=None, headers={},
-                 origin_req_host=None, unverifiable=False):
+                 origin_req_host=None, unverifiable=False, visit=None):
         urllib2.Request.__init__(self, url, data, headers)
         self.selector = None
         self.unredirected_hdrs = {}
+        self.visit = visit
 
         # All the terminology below comes from RFC 2965.
         self.unverifiable = unverifiable

Modified: wwwsearch/mechanize/trunk/mechanize/_upgrade.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_upgrade.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_upgrade.py	Sun Oct  8 01:34:33 2006
@@ -17,6 +17,8 @@
             except AttributeError: pass
             try: newrequest.unverifiable = request.unverifiable
             except AttributeError: pass
+            try: newrequest.visit = request.visit
+            except AttributeError: pass
             request = newrequest
         return request
 

Modified: wwwsearch/mechanize/trunk/test/test_browser.doctest
==============================================================================
--- wwwsearch/mechanize/trunk/test/test_browser.doctest	(original)
+++ wwwsearch/mechanize/trunk/test/test_browser.doctest	Sun Oct  8 01:34:33 2006
@@ -67,3 +67,34 @@
 True
 >>> len(br._history._history)
 0
+
+
+.open()ing a Request with False .visit does not affect Browser state.
+Redirections during such a non-visiting request should also be
+non-visiting.
+
+>>> from mechanize import BrowserStateError, Request, HTTPRedirectHandler
+>>> from test_urllib2 import MockHTTPHandler
+
+>>> req = Request("http://example.com")
+>>> req.visit = False
+>>> br = TestBrowser2()
+>>> hh = MockHTTPHandler(301, "Location: http://example.com/\r\n\r\n")
+>>> br.add_handler(hh)
+>>> br.add_handler(HTTPRedirectHandler())
+>>> def raises(exc_class, fn, *args, **kwds):
+...     try:
+...         fn(*args, **kwds)
+...     except exc_class, exc:
+...         return True
+...     return False
+>>> def test_state(br):
+...     return (br.request is None and
+...             br.response() is None and
+...             raises(BrowserStateError, br.back)
+...             )
+>>> test_state(br)
+True
+>>> r = br.open(req)
+>>> test_state(br)
+True

Modified: wwwsearch/mechanize/trunk/test/test_urllib2.py
==============================================================================
--- wwwsearch/mechanize/trunk/test/test_urllib2.py	(original)
+++ wwwsearch/mechanize/trunk/test/test_urllib2.py	Sun Oct  8 01:34:33 2006
@@ -22,6 +22,7 @@
 import mechanize
 
 from mechanize._http import AbstractHTTPHandler, parse_head
+from mechanize._response import test_response
 from mechanize import HTTPRedirectHandler, HTTPRequestUpgradeProcessor, \
      HTTPEquivProcessor, HTTPRefreshProcessor, SeekableProcessor, \
      HTTPCookieProcessor, HTTPRefererProcessor, \
@@ -1092,11 +1093,10 @@
             self._count = self._count + 1
             msg = mimetools.Message(StringIO(self.headers))
             return self.parent.error(
-                "http", req, MockFile(), self.code, "Blah", msg)
+                "http", req, test_response(), self.code, "Blah", msg)
         else:
             self.req = req
-            msg = mimetools.Message(StringIO("\r\n\r\n"))
-            return MockResponse(200, "OK", msg, "", req.get_full_url())
+            return test_response("", [], req.get_full_url())
 
 
 class MyHTTPHandler(HTTPHandler): pass


More information about the wwwsearch-commits mailing list