[wwwsearch-commits] r26879 - in wwwsearch/mechanize/trunk: . mechanize test

jjlee at codespeak.net jjlee at codespeak.net
Sat May 6 18:57:29 CEST 2006


Author: jjlee
Date: Sat May  6 18:57:27 2006
New Revision: 26879

Modified:
   wwwsearch/mechanize/trunk/doc.html.in
   wwwsearch/mechanize/trunk/mechanize/_Opener.py
   wwwsearch/mechanize/trunk/mechanize/_Util.py
   wwwsearch/mechanize/trunk/mechanize/__init__.py
   wwwsearch/mechanize/trunk/mechanize/_mechanize.py
   wwwsearch/mechanize/trunk/mechanize/_urllib2_support.py
   wwwsearch/mechanize/trunk/mechanize/_useragent.py
   wwwsearch/mechanize/trunk/test/test_misc.py
   wwwsearch/mechanize/trunk/test/test_urllib2.py
Log:
Fix response upgrading; Add .get_data() method to responses; Extend processor mechanism so methods named .any_request() / .any_response() are called for all requests and responses (rather than just for matching URL schemes, as for e.g. .http_request() / .http_respnse()); Always use mechanize's own OpenerDirector

Modified: wwwsearch/mechanize/trunk/doc.html.in
==============================================================================
--- wwwsearch/mechanize/trunk/doc.html.in	(original)
+++ wwwsearch/mechanize/trunk/doc.html.in	Sat May  6 18:57:27 2006
@@ -349,8 +349,7 @@
 <dd><p>This makes ClientCookie's response objects <code>seek()</code>able.
 Seeking is done lazily (ie. the response object only reads from the socket as
 necessary, rather than slurping in all the data before the response is returned
-to you).  XXX only works for HTTP ATM, I think, and also doesn't work for
-HTTPError exceptions...
+to you).
 
 <dt><code>HTTPRefererProcessor</code>
 

Modified: wwwsearch/mechanize/trunk/mechanize/_Opener.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_Opener.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_Opener.py	Sat May  6 18:57:27 2006
@@ -42,65 +42,7 @@
     return names
 
 
-class OpenerMixin:
-    def _request(self, url_or_req, data):
-        if isstringlike(url_or_req):
-            req = Request(url_or_req, data)
-        else:
-            # already a urllib2.Request or mechanize.Request instance
-            req = url_or_req
-            if data is not None:
-                req.add_data(data)
-        return req
-
-    def retrieve(self, fullurl, filename=None, reporthook=None, data=None):
-        """Returns (filename, headers).
-
-        For remote objects, the default filename will refer to a temporary
-        file.
-
-        """
-        req = self._request(fullurl, data)
-        type_ = req.get_type()
-        fp = self.open(req)
-        headers = fp.info()
-        if filename is None and type == 'file':
-            return url2pathname(req.get_selector()), headers
-        if filename:
-            tfp = open(filename, 'wb')
-        else:
-            path = urlparse(fullurl)[2]
-            suffix = os.path.splitext(path)[1]
-            tfp = tempfile.TemporaryFile("wb", suffix=suffix)
-        result = filename, headers
-        bs = 1024*8
-        size = -1
-        read = 0
-        blocknum = 1
-        if reporthook:
-            if headers.has_key("content-length"):
-                size = int(headers["Content-Length"])
-            reporthook(0, bs, size)
-        while 1:
-            block = fp.read(bs)
-            read += len(block)
-            if reporthook:
-                reporthook(blocknum, bs, size)
-            blocknum = blocknum + 1
-            if not block:
-                break
-            tfp.write(block)
-        fp.close()
-        tfp.close()
-        del fp
-        del tfp
-        if size>=0 and read<size:
-            raise IOError("incomplete retrieval error",
-                          "got only %d bytes out of %d" % (read,size))
-        return result
-
-
-class OpenerDirector(urllib2.OpenerDirector, OpenerMixin):
+class OpenerDirector(urllib2.OpenerDirector):
     def __init__(self):
         urllib2.OpenerDirector.__init__(self)
         self.process_response = {}
@@ -141,32 +83,47 @@
             else:
                 lookup[kind] = [handler]
             added = True
-            continue
 
         if added:
             # XXX why does self.handlers need to be sorted?
             bisect.insort(self.handlers, handler)
             handler.add_parent(self)
 
+    def _request(self, url_or_req, data):
+        if isstringlike(url_or_req):
+            req = Request(url_or_req, data)
+        else:
+            # already a urllib2.Request or mechanize.Request instance
+            req = url_or_req
+            if data is not None:
+                req.add_data(data)
+        return req
+
     def open(self, fullurl, data=None):
         req = self._request(fullurl, data)
-        type_ = req.get_type()
+        req_scheme = req.get_type()
 
         # pre-process request
         # XXX should we allow a Processor to change the type (URL
         #   scheme) of the request?
-        meth_name = type_+"_request"
-        for processor in self.process_request.get(type_, []):
-            meth = getattr(processor, meth_name)
-            req = meth(req)
-
-        response = urllib2.OpenerDirector.open(self, req, data)
+        for scheme in ["any", req_scheme]:
+            meth_name = scheme+"_request"
+            for processor in self.process_request.get(scheme, []):
+                meth = getattr(processor, meth_name)
+                req = meth(req)
+
+        # In Python >= 2.4, .open() supports processors already, so we must
+        # call ._open() instead.
+        urlopen = getattr(urllib2.OpenerDirector, "_open",
+                          urllib2.OpenerDirector.open)
+        response = urlopen(self, req, data)
 
         # post-process response
-        meth_name = type_+"_response"
-        for processor in self.process_response.get(type_, []):
-            meth = getattr(processor, meth_name)
-            response = meth(req, response)
+        for scheme in ["any", req_scheme]:
+            meth_name = scheme+"_response"
+            for processor in self.process_response.get(scheme, []):
+                meth = getattr(processor, meth_name)
+                response = meth(req, response)
 
         return response
 
@@ -190,3 +147,49 @@
         if http_err:
             args = (dict, 'default', 'http_error_default') + orig_args
             return apply(self._call_chain, args)
+
+    def retrieve(self, fullurl, filename=None, reporthook=None, data=None):
+        """Returns (filename, headers).
+
+        For remote objects, the default filename will refer to a temporary
+        file.
+
+        """
+        req = self._request(fullurl, data)
+        type_ = req.get_type()
+        fp = self.open(req)
+        headers = fp.info()
+        if filename is None and type == 'file':
+            return url2pathname(req.get_selector()), headers
+        if filename:
+            tfp = open(filename, 'wb')
+        else:
+            path = urlparse(fullurl)[2]
+            suffix = os.path.splitext(path)[1]
+            tfp = tempfile.TemporaryFile("wb", suffix=suffix)
+        result = filename, headers
+        bs = 1024*8
+        size = -1
+        read = 0
+        blocknum = 1
+        if reporthook:
+            if headers.has_key("content-length"):
+                size = int(headers["Content-Length"])
+            reporthook(0, bs, size)
+        while 1:
+            block = fp.read(bs)
+            read += len(block)
+            if reporthook:
+                reporthook(blocknum, bs, size)
+            blocknum = blocknum + 1
+            if not block:
+                break
+            tfp.write(block)
+        fp.close()
+        tfp.close()
+        del fp
+        del tfp
+        if size>=0 and read<size:
+            raise IOError("incomplete retrieval error",
+                          "got only %d bytes out of %d" % (read,size))
+        return result

Modified: wwwsearch/mechanize/trunk/mechanize/_Util.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_Util.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_Util.py	Sat May  6 18:57:27 2006
@@ -411,6 +411,14 @@
         cpy.__cache = self.__cache
         return cpy
 
+    def get_data(self):
+        pos = self.__pos
+        try:
+            self.seek(0)
+            return self.read(-1)
+        finally:
+            self.__pos = pos
+
     def read(self, size=-1):
         pos = self.__pos
         end = len(self.__cache.getvalue())

Modified: wwwsearch/mechanize/trunk/mechanize/__init__.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/__init__.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/__init__.py	Sat May  6 18:57:27 2006
@@ -20,9 +20,10 @@
 from _MSIECookieJar import MSIECookieJar
 from _urllib2_support import \
      Request, \
-     OpenerDirector, build_opener, install_opener, urlopen, \
+     build_opener, install_opener, urlopen, \
      OpenerFactory, urlretrieve, BaseHandler, HeadParser, \
      RobotExclusionError
+from _Opener import OpenerDirector
 try:
     from _urllib2_support import XHTMLCompatibleHeadParser
 except ImportError:

Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_mechanize.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py	Sat May  6 18:57:27 2006
@@ -24,7 +24,7 @@
 
 from _useragent import UserAgent
 from _html import DefaultFactory
-from _Util import response_seek_wrapper
+from _Util import response_seek_wrapper, closeable_response
 import _Request
 
 __version__ = (0, 1, 0, "a", None)  # 0.1.0a
@@ -60,13 +60,43 @@
             response.close()
         del self._history[:]
 
+# Horrible, but needed, at least until fork urllib2.  Even then, may want
+# to preseve urllib2 compatibility.
+def upgrade_response(response):
+    # a urllib2 handler constructed the response, i.e. the response is an
+    # urllib.addinfourl, instead of a _Util.closeable_response as returned
+    # by e.g. mechanize.HTTPHandler
+    try:
+        code = response.code
+    except AttributeError:
+        code = None
+    try:
+        msg = response.msg
+    except AttributeError:
+        msg = None
+
+    # may have already-.read() data from .seek() cache
+    data = None
+    get_data = getattr(response, "get_data", None)
+    if get_data:
+        data = get_data()
+
+    response = closeable_response(
+        response.fp, response.info(), response.geturl(), code, msg)
+    response = response_seek_wrapper(response)
+    if data:
+        response.set_data(data)
+    return response
+class ResponseUpgradeProcessor(urllib2.BaseHandler):
+    # upgrade responses to be .close()able without becoming unusable
+    handler_order = 0  # before anything else
+    def any_response(self, request, response):
+        if not hasattr(response, 'closeable_response'):
+            response = upgrade_response(response)
+        return response
 
-if sys.version_info[:2] >= (2, 4):
-    from _Opener import OpenerMixin
-else:
-    class OpenerMixin: pass
 
-class Browser(UserAgent, OpenerMixin):
+class Browser(UserAgent):
     """Browser-like class with support for history, forms and links.
 
     BrowserStateError is raised whenever the browser is in the wrong state to
@@ -81,6 +111,11 @@
 
     """
 
+    handler_classes = UserAgent.handler_classes.copy()
+    handler_classes["_response_upgrade"] = ResponseUpgradeProcessor
+    default_others = copy.copy(UserAgent.default_others)
+    default_others.append("_response_upgrade")
+
     def __init__(self,
                  factory=None,
                  history=None,
@@ -193,7 +228,6 @@
 
     def set_response(self, response):
         """Replace current response with (a copy of) response."""
-        from _Util import closeable_response
         # sanity check, necessary but far from sufficient
         if not (hasattr(response, "info") and hasattr(response, "geturl") and
                 hasattr(response, "read")):
@@ -201,32 +235,13 @@
 
         self.form = None
 
-        # XXX bleah!!
-
-        if not hasattr(response, 'closeable_response'):
-            # we expect to get here if a urllib2 handler constructed the
-            # response, i.e. the response is an urllib.addinfourl, instead of a
-            # _Util.closeable_response as returned by
-            # e.g. mechanize.HTTPHandler
-            try:
-                code = response.code
-            except AttributeError:
-                code = None
-            try:
-                msg = response.msg
-            except AttributeError:
-                msg = None
-            # assume response has an .fp attribute, the socket fileobject
-            # (i.e. is an urllib.addinfourl, really).
-            response = closeable_response(
-                response.fp, response.info(), response.geturl(), code, msg)
         if not hasattr(response, "seek"):
             response = response_seek_wrapper(response)
-        # 0) don't want to copy here, but
-        # 1) don't want to copy some of the time and not other times
-        # 2) need response to be .close()able and .seek()able
-        # 3) 2) and 1) imply must always be copy.copy()ed
-        response = copy.copy(response)
+        if not hasattr(response, "closeable_response"):
+            response = ResponseUpgradeProcessor().any_response(
+                'junk', response)
+        else:
+            response = copy.copy(response)
 
         self._response = response
         self._factory.set_response(self._response)

Modified: wwwsearch/mechanize/trunk/mechanize/_urllib2_support.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_urllib2_support.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_urllib2_support.py	Sat May  6 18:57:27 2006
@@ -14,6 +14,7 @@
 import copy, time, tempfile, htmlentitydefs, re
 
 from _ClientCookie import CookieJar, request_host
+import _Opener
 from _Util import isstringlike, startswith, getheaders, closeable_response
 from _HeadersUtil import is_html
 from _Debug import getLogger
@@ -406,18 +407,14 @@
 
         https_response = http_response
 
-    # XXX ATM this only takes notice of http responses -- probably
-    #   should be independent of protocol scheme (http, ftp, etc.)
     class SeekableProcessor(BaseHandler):
         """Make responses seekable."""
 
-        def http_response(self, request, response):
+        def any_response(self, request, response):
             if not hasattr(response, "seek"):
                 return response_seek_wrapper(response)
             return response
 
-        https_response = http_response
-
     class HTTPCookieProcessor(BaseHandler):
         """Handle HTTP cookies.
 
@@ -731,14 +728,6 @@
 
 ##             https_request = AbstractHTTPHandler.do_request_
 
-    if int(10*float(urllib2.__version__[:3])) >= 24:
-        # urllib2 supports processors already
-        from _Opener import OpenerMixin
-        class OpenerDirector(urllib2.OpenerDirector, OpenerMixin):
-            pass
-    else:
-        from _Opener import OpenerDirector
-
     class OpenerFactory:
         """This class's interface is quite likely to change."""
 
@@ -763,7 +752,7 @@
         handlers = []
         replacement_handlers = []
 
-        def __init__(self, klass=OpenerDirector):
+        def __init__(self, klass=_Opener.OpenerDirector):
             self.klass = klass
 
         def build_opener(self, *handlers):

Modified: wwwsearch/mechanize/trunk/mechanize/_useragent.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_useragent.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_useragent.py	Sat May  6 18:57:27 2006
@@ -14,10 +14,10 @@
 import sys
 import urllib2
 
+from _Opener import OpenerDirector
 if sys.version_info[:2] >= (2, 4):
-    from urllib2 import OpenerDirector, BaseHandler, HTTPErrorProcessor
+    from urllib2 import BaseHandler, HTTPErrorProcessor
 else:
-    from _Opener import OpenerDirector
     from _urllib2_support import BaseHandler, HTTPErrorProcessor
 
 import _urllib2_support

Modified: wwwsearch/mechanize/trunk/test/test_misc.py
==============================================================================
--- wwwsearch/mechanize/trunk/test/test_misc.py	(original)
+++ wwwsearch/mechanize/trunk/test/test_misc.py	Sat May  6 18:57:27 2006
@@ -202,28 +202,12 @@
         rsw.seek(0)
         self._test4(rsw)
 
-    def testSetResponseData(self):
+    def testGetResponseData(self):
         from mechanize import response_seek_wrapper
         r = TestUnSeekableResponse(self.text, {'blah': 'yawn'})
         rsw = response_seek_wrapper(r)
-        rsw.set_data("""\
-A Seeming somwhat more than View;
-  That doth instruct the Mind
-  In Things that ly behind,
-""")
-        self.assertEqual(rsw.read(9), "A Seeming")
-        self.assertEqual(rsw.read(13), " somwhat more")
-        rsw.seek(0)
-        self.assertEqual(rsw.read(9), "A Seeming")
-        self.assertEqual(rsw.readline(), " somwhat more than View;\n")
-        rsw.seek(0)
-        self.assertEqual(rsw.readline(), "A Seeming somwhat more than View;\n")
-        rsw.seek(-1, 1)
-        self.assertEqual(rsw.read(7), "\n  That")
 
-        r = TestUnSeekableResponse(self.text, {'blah': 'yawn'})
-        rsw = response_seek_wrapper(r)
-        rsw.set_data(self.text)
+        self.assertEqual(rsw.get_data(), self.text)
         self._test2(rsw)
         rsw.seek(0)
         self._test4(rsw)

Modified: wwwsearch/mechanize/trunk/test/test_urllib2.py
==============================================================================
--- wwwsearch/mechanize/trunk/test/test_urllib2.py	(original)
+++ wwwsearch/mechanize/trunk/test/test_urllib2.py	Sat May  6 18:57:27 2006
@@ -225,11 +225,12 @@
 
         req = Request("http://example.com/")
         r = o.open(req)
+
         # processor methods are called on *all* handlers that define them,
         # not just the first handler
         calls = [(handlers[0], "http_request"), (handlers[1], "http_request"),
                  (handlers[0], "http_response"), (handlers[1], "http_response")]
-
+        self.assertEqual(len(o.calls), len(calls))
         for i in range(len(o.calls)):
             handler, name, args, kwds = o.calls[i]
             if i < 2:
@@ -247,6 +248,47 @@
                 self.assert_(args[1] is None or
                              isinstance(args[1], MockResponse))
 
+    def test_any(self):
+        o = OpenerDirector()
+        meth_spec = [[
+            ("http_request", "return request"),
+            ("http_response", "return response"),
+            ("ftp_request", "return request"),
+            ("ftp_response", "return response"),
+            ("any_request", "return request"),
+            ("any_response", "return response"),
+            ]]
+        handlers = add_ordered_mock_handlers(o, meth_spec)
+        handler = handlers[0]
+
+        for scheme in ["http", "ftp"]:
+            o.calls = []
+            req = Request("%s://example.com/" % scheme)
+            r = o.open(req)
+
+            calls = [(handler, "any_request"),
+                     (handler, ("%s_request" % scheme)),
+                     (handler, "any_response"),
+                     (handler, ("%s_response" % scheme)),
+                     ]
+            #self.assertEqual(len(o.calls), len(calls))
+            for i, ((handler, name, args, kwds), calls) in (
+                enumerate(zip(o.calls, calls))):
+                if i < 2:
+                    # *_request
+                    self.assert_((handler, name) == calls)
+                    self.assert_(len(args) == 1)
+                    self.assert_(isinstance(args[0], Request))
+                else:
+                    # *_response
+                    self.assert_((handler, name) == calls)
+                    self.assert_(len(args) == 2)
+                    self.assert_(isinstance(args[0], Request))
+                    # response from opener.open is None, because there's no
+                    # handler that defines http_open to handle it
+                    self.assert_(args[1] is None or
+                                 isinstance(args[1], MockResponse))
+
 
 class MockHTTPResponse:
     def __init__(self, fp, msg, status, reason):
@@ -668,7 +710,7 @@
             def info(self): pass
             def geturl(self): return ""
         r = MockUnseekableResponse()
-        newr = h.http_response(req, r)
+        newr = h.any_response(req, r)
         self.assert_(not hasattr(r, "seek"))
         self.assert_(hasattr(newr, "seek"))
 


More information about the wwwsearch-commits mailing list