[wwwsearch-commits] r26879 - in wwwsearch/mechanize/trunk: .
mechanize test
jjlee at codespeak.net
jjlee at codespeak.net
Sat May 6 18:57:29 CEST 2006
Author: jjlee
Date: Sat May 6 18:57:27 2006
New Revision: 26879
Modified:
wwwsearch/mechanize/trunk/doc.html.in
wwwsearch/mechanize/trunk/mechanize/_Opener.py
wwwsearch/mechanize/trunk/mechanize/_Util.py
wwwsearch/mechanize/trunk/mechanize/__init__.py
wwwsearch/mechanize/trunk/mechanize/_mechanize.py
wwwsearch/mechanize/trunk/mechanize/_urllib2_support.py
wwwsearch/mechanize/trunk/mechanize/_useragent.py
wwwsearch/mechanize/trunk/test/test_misc.py
wwwsearch/mechanize/trunk/test/test_urllib2.py
Log:
Fix response upgrading; Add .get_data() method to responses; Extend processor mechanism so methods named .any_request() / .any_response() are called for all requests and responses (rather than just for matching URL schemes, as for e.g. .http_request() / .http_respnse()); Always use mechanize's own OpenerDirector
Modified: wwwsearch/mechanize/trunk/doc.html.in
==============================================================================
--- wwwsearch/mechanize/trunk/doc.html.in (original)
+++ wwwsearch/mechanize/trunk/doc.html.in Sat May 6 18:57:27 2006
@@ -349,8 +349,7 @@
<dd><p>This makes ClientCookie's response objects <code>seek()</code>able.
Seeking is done lazily (ie. the response object only reads from the socket as
necessary, rather than slurping in all the data before the response is returned
-to you). XXX only works for HTTP ATM, I think, and also doesn't work for
-HTTPError exceptions...
+to you).
<dt><code>HTTPRefererProcessor</code>
Modified: wwwsearch/mechanize/trunk/mechanize/_Opener.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_Opener.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_Opener.py Sat May 6 18:57:27 2006
@@ -42,65 +42,7 @@
return names
-class OpenerMixin:
- def _request(self, url_or_req, data):
- if isstringlike(url_or_req):
- req = Request(url_or_req, data)
- else:
- # already a urllib2.Request or mechanize.Request instance
- req = url_or_req
- if data is not None:
- req.add_data(data)
- return req
-
- def retrieve(self, fullurl, filename=None, reporthook=None, data=None):
- """Returns (filename, headers).
-
- For remote objects, the default filename will refer to a temporary
- file.
-
- """
- req = self._request(fullurl, data)
- type_ = req.get_type()
- fp = self.open(req)
- headers = fp.info()
- if filename is None and type == 'file':
- return url2pathname(req.get_selector()), headers
- if filename:
- tfp = open(filename, 'wb')
- else:
- path = urlparse(fullurl)[2]
- suffix = os.path.splitext(path)[1]
- tfp = tempfile.TemporaryFile("wb", suffix=suffix)
- result = filename, headers
- bs = 1024*8
- size = -1
- read = 0
- blocknum = 1
- if reporthook:
- if headers.has_key("content-length"):
- size = int(headers["Content-Length"])
- reporthook(0, bs, size)
- while 1:
- block = fp.read(bs)
- read += len(block)
- if reporthook:
- reporthook(blocknum, bs, size)
- blocknum = blocknum + 1
- if not block:
- break
- tfp.write(block)
- fp.close()
- tfp.close()
- del fp
- del tfp
- if size>=0 and read<size:
- raise IOError("incomplete retrieval error",
- "got only %d bytes out of %d" % (read,size))
- return result
-
-
-class OpenerDirector(urllib2.OpenerDirector, OpenerMixin):
+class OpenerDirector(urllib2.OpenerDirector):
def __init__(self):
urllib2.OpenerDirector.__init__(self)
self.process_response = {}
@@ -141,32 +83,47 @@
else:
lookup[kind] = [handler]
added = True
- continue
if added:
# XXX why does self.handlers need to be sorted?
bisect.insort(self.handlers, handler)
handler.add_parent(self)
+ def _request(self, url_or_req, data):
+ if isstringlike(url_or_req):
+ req = Request(url_or_req, data)
+ else:
+ # already a urllib2.Request or mechanize.Request instance
+ req = url_or_req
+ if data is not None:
+ req.add_data(data)
+ return req
+
def open(self, fullurl, data=None):
req = self._request(fullurl, data)
- type_ = req.get_type()
+ req_scheme = req.get_type()
# pre-process request
# XXX should we allow a Processor to change the type (URL
# scheme) of the request?
- meth_name = type_+"_request"
- for processor in self.process_request.get(type_, []):
- meth = getattr(processor, meth_name)
- req = meth(req)
-
- response = urllib2.OpenerDirector.open(self, req, data)
+ for scheme in ["any", req_scheme]:
+ meth_name = scheme+"_request"
+ for processor in self.process_request.get(scheme, []):
+ meth = getattr(processor, meth_name)
+ req = meth(req)
+
+ # In Python >= 2.4, .open() supports processors already, so we must
+ # call ._open() instead.
+ urlopen = getattr(urllib2.OpenerDirector, "_open",
+ urllib2.OpenerDirector.open)
+ response = urlopen(self, req, data)
# post-process response
- meth_name = type_+"_response"
- for processor in self.process_response.get(type_, []):
- meth = getattr(processor, meth_name)
- response = meth(req, response)
+ for scheme in ["any", req_scheme]:
+ meth_name = scheme+"_response"
+ for processor in self.process_response.get(scheme, []):
+ meth = getattr(processor, meth_name)
+ response = meth(req, response)
return response
@@ -190,3 +147,49 @@
if http_err:
args = (dict, 'default', 'http_error_default') + orig_args
return apply(self._call_chain, args)
+
+ def retrieve(self, fullurl, filename=None, reporthook=None, data=None):
+ """Returns (filename, headers).
+
+ For remote objects, the default filename will refer to a temporary
+ file.
+
+ """
+ req = self._request(fullurl, data)
+ type_ = req.get_type()
+ fp = self.open(req)
+ headers = fp.info()
+ if filename is None and type == 'file':
+ return url2pathname(req.get_selector()), headers
+ if filename:
+ tfp = open(filename, 'wb')
+ else:
+ path = urlparse(fullurl)[2]
+ suffix = os.path.splitext(path)[1]
+ tfp = tempfile.TemporaryFile("wb", suffix=suffix)
+ result = filename, headers
+ bs = 1024*8
+ size = -1
+ read = 0
+ blocknum = 1
+ if reporthook:
+ if headers.has_key("content-length"):
+ size = int(headers["Content-Length"])
+ reporthook(0, bs, size)
+ while 1:
+ block = fp.read(bs)
+ read += len(block)
+ if reporthook:
+ reporthook(blocknum, bs, size)
+ blocknum = blocknum + 1
+ if not block:
+ break
+ tfp.write(block)
+ fp.close()
+ tfp.close()
+ del fp
+ del tfp
+ if size>=0 and read<size:
+ raise IOError("incomplete retrieval error",
+ "got only %d bytes out of %d" % (read,size))
+ return result
Modified: wwwsearch/mechanize/trunk/mechanize/_Util.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_Util.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_Util.py Sat May 6 18:57:27 2006
@@ -411,6 +411,14 @@
cpy.__cache = self.__cache
return cpy
+ def get_data(self):
+ pos = self.__pos
+ try:
+ self.seek(0)
+ return self.read(-1)
+ finally:
+ self.__pos = pos
+
def read(self, size=-1):
pos = self.__pos
end = len(self.__cache.getvalue())
Modified: wwwsearch/mechanize/trunk/mechanize/__init__.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/__init__.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/__init__.py Sat May 6 18:57:27 2006
@@ -20,9 +20,10 @@
from _MSIECookieJar import MSIECookieJar
from _urllib2_support import \
Request, \
- OpenerDirector, build_opener, install_opener, urlopen, \
+ build_opener, install_opener, urlopen, \
OpenerFactory, urlretrieve, BaseHandler, HeadParser, \
RobotExclusionError
+from _Opener import OpenerDirector
try:
from _urllib2_support import XHTMLCompatibleHeadParser
except ImportError:
Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Sat May 6 18:57:27 2006
@@ -24,7 +24,7 @@
from _useragent import UserAgent
from _html import DefaultFactory
-from _Util import response_seek_wrapper
+from _Util import response_seek_wrapper, closeable_response
import _Request
__version__ = (0, 1, 0, "a", None) # 0.1.0a
@@ -60,13 +60,43 @@
response.close()
del self._history[:]
+# Horrible, but needed, at least until fork urllib2. Even then, may want
+# to preseve urllib2 compatibility.
+def upgrade_response(response):
+ # a urllib2 handler constructed the response, i.e. the response is an
+ # urllib.addinfourl, instead of a _Util.closeable_response as returned
+ # by e.g. mechanize.HTTPHandler
+ try:
+ code = response.code
+ except AttributeError:
+ code = None
+ try:
+ msg = response.msg
+ except AttributeError:
+ msg = None
+
+ # may have already-.read() data from .seek() cache
+ data = None
+ get_data = getattr(response, "get_data", None)
+ if get_data:
+ data = get_data()
+
+ response = closeable_response(
+ response.fp, response.info(), response.geturl(), code, msg)
+ response = response_seek_wrapper(response)
+ if data:
+ response.set_data(data)
+ return response
+class ResponseUpgradeProcessor(urllib2.BaseHandler):
+ # upgrade responses to be .close()able without becoming unusable
+ handler_order = 0 # before anything else
+ def any_response(self, request, response):
+ if not hasattr(response, 'closeable_response'):
+ response = upgrade_response(response)
+ return response
-if sys.version_info[:2] >= (2, 4):
- from _Opener import OpenerMixin
-else:
- class OpenerMixin: pass
-class Browser(UserAgent, OpenerMixin):
+class Browser(UserAgent):
"""Browser-like class with support for history, forms and links.
BrowserStateError is raised whenever the browser is in the wrong state to
@@ -81,6 +111,11 @@
"""
+ handler_classes = UserAgent.handler_classes.copy()
+ handler_classes["_response_upgrade"] = ResponseUpgradeProcessor
+ default_others = copy.copy(UserAgent.default_others)
+ default_others.append("_response_upgrade")
+
def __init__(self,
factory=None,
history=None,
@@ -193,7 +228,6 @@
def set_response(self, response):
"""Replace current response with (a copy of) response."""
- from _Util import closeable_response
# sanity check, necessary but far from sufficient
if not (hasattr(response, "info") and hasattr(response, "geturl") and
hasattr(response, "read")):
@@ -201,32 +235,13 @@
self.form = None
- # XXX bleah!!
-
- if not hasattr(response, 'closeable_response'):
- # we expect to get here if a urllib2 handler constructed the
- # response, i.e. the response is an urllib.addinfourl, instead of a
- # _Util.closeable_response as returned by
- # e.g. mechanize.HTTPHandler
- try:
- code = response.code
- except AttributeError:
- code = None
- try:
- msg = response.msg
- except AttributeError:
- msg = None
- # assume response has an .fp attribute, the socket fileobject
- # (i.e. is an urllib.addinfourl, really).
- response = closeable_response(
- response.fp, response.info(), response.geturl(), code, msg)
if not hasattr(response, "seek"):
response = response_seek_wrapper(response)
- # 0) don't want to copy here, but
- # 1) don't want to copy some of the time and not other times
- # 2) need response to be .close()able and .seek()able
- # 3) 2) and 1) imply must always be copy.copy()ed
- response = copy.copy(response)
+ if not hasattr(response, "closeable_response"):
+ response = ResponseUpgradeProcessor().any_response(
+ 'junk', response)
+ else:
+ response = copy.copy(response)
self._response = response
self._factory.set_response(self._response)
Modified: wwwsearch/mechanize/trunk/mechanize/_urllib2_support.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_urllib2_support.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_urllib2_support.py Sat May 6 18:57:27 2006
@@ -14,6 +14,7 @@
import copy, time, tempfile, htmlentitydefs, re
from _ClientCookie import CookieJar, request_host
+import _Opener
from _Util import isstringlike, startswith, getheaders, closeable_response
from _HeadersUtil import is_html
from _Debug import getLogger
@@ -406,18 +407,14 @@
https_response = http_response
- # XXX ATM this only takes notice of http responses -- probably
- # should be independent of protocol scheme (http, ftp, etc.)
class SeekableProcessor(BaseHandler):
"""Make responses seekable."""
- def http_response(self, request, response):
+ def any_response(self, request, response):
if not hasattr(response, "seek"):
return response_seek_wrapper(response)
return response
- https_response = http_response
-
class HTTPCookieProcessor(BaseHandler):
"""Handle HTTP cookies.
@@ -731,14 +728,6 @@
## https_request = AbstractHTTPHandler.do_request_
- if int(10*float(urllib2.__version__[:3])) >= 24:
- # urllib2 supports processors already
- from _Opener import OpenerMixin
- class OpenerDirector(urllib2.OpenerDirector, OpenerMixin):
- pass
- else:
- from _Opener import OpenerDirector
-
class OpenerFactory:
"""This class's interface is quite likely to change."""
@@ -763,7 +752,7 @@
handlers = []
replacement_handlers = []
- def __init__(self, klass=OpenerDirector):
+ def __init__(self, klass=_Opener.OpenerDirector):
self.klass = klass
def build_opener(self, *handlers):
Modified: wwwsearch/mechanize/trunk/mechanize/_useragent.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_useragent.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_useragent.py Sat May 6 18:57:27 2006
@@ -14,10 +14,10 @@
import sys
import urllib2
+from _Opener import OpenerDirector
if sys.version_info[:2] >= (2, 4):
- from urllib2 import OpenerDirector, BaseHandler, HTTPErrorProcessor
+ from urllib2 import BaseHandler, HTTPErrorProcessor
else:
- from _Opener import OpenerDirector
from _urllib2_support import BaseHandler, HTTPErrorProcessor
import _urllib2_support
Modified: wwwsearch/mechanize/trunk/test/test_misc.py
==============================================================================
--- wwwsearch/mechanize/trunk/test/test_misc.py (original)
+++ wwwsearch/mechanize/trunk/test/test_misc.py Sat May 6 18:57:27 2006
@@ -202,28 +202,12 @@
rsw.seek(0)
self._test4(rsw)
- def testSetResponseData(self):
+ def testGetResponseData(self):
from mechanize import response_seek_wrapper
r = TestUnSeekableResponse(self.text, {'blah': 'yawn'})
rsw = response_seek_wrapper(r)
- rsw.set_data("""\
-A Seeming somwhat more than View;
- That doth instruct the Mind
- In Things that ly behind,
-""")
- self.assertEqual(rsw.read(9), "A Seeming")
- self.assertEqual(rsw.read(13), " somwhat more")
- rsw.seek(0)
- self.assertEqual(rsw.read(9), "A Seeming")
- self.assertEqual(rsw.readline(), " somwhat more than View;\n")
- rsw.seek(0)
- self.assertEqual(rsw.readline(), "A Seeming somwhat more than View;\n")
- rsw.seek(-1, 1)
- self.assertEqual(rsw.read(7), "\n That")
- r = TestUnSeekableResponse(self.text, {'blah': 'yawn'})
- rsw = response_seek_wrapper(r)
- rsw.set_data(self.text)
+ self.assertEqual(rsw.get_data(), self.text)
self._test2(rsw)
rsw.seek(0)
self._test4(rsw)
Modified: wwwsearch/mechanize/trunk/test/test_urllib2.py
==============================================================================
--- wwwsearch/mechanize/trunk/test/test_urllib2.py (original)
+++ wwwsearch/mechanize/trunk/test/test_urllib2.py Sat May 6 18:57:27 2006
@@ -225,11 +225,12 @@
req = Request("http://example.com/")
r = o.open(req)
+
# processor methods are called on *all* handlers that define them,
# not just the first handler
calls = [(handlers[0], "http_request"), (handlers[1], "http_request"),
(handlers[0], "http_response"), (handlers[1], "http_response")]
-
+ self.assertEqual(len(o.calls), len(calls))
for i in range(len(o.calls)):
handler, name, args, kwds = o.calls[i]
if i < 2:
@@ -247,6 +248,47 @@
self.assert_(args[1] is None or
isinstance(args[1], MockResponse))
+ def test_any(self):
+ o = OpenerDirector()
+ meth_spec = [[
+ ("http_request", "return request"),
+ ("http_response", "return response"),
+ ("ftp_request", "return request"),
+ ("ftp_response", "return response"),
+ ("any_request", "return request"),
+ ("any_response", "return response"),
+ ]]
+ handlers = add_ordered_mock_handlers(o, meth_spec)
+ handler = handlers[0]
+
+ for scheme in ["http", "ftp"]:
+ o.calls = []
+ req = Request("%s://example.com/" % scheme)
+ r = o.open(req)
+
+ calls = [(handler, "any_request"),
+ (handler, ("%s_request" % scheme)),
+ (handler, "any_response"),
+ (handler, ("%s_response" % scheme)),
+ ]
+ #self.assertEqual(len(o.calls), len(calls))
+ for i, ((handler, name, args, kwds), calls) in (
+ enumerate(zip(o.calls, calls))):
+ if i < 2:
+ # *_request
+ self.assert_((handler, name) == calls)
+ self.assert_(len(args) == 1)
+ self.assert_(isinstance(args[0], Request))
+ else:
+ # *_response
+ self.assert_((handler, name) == calls)
+ self.assert_(len(args) == 2)
+ self.assert_(isinstance(args[0], Request))
+ # response from opener.open is None, because there's no
+ # handler that defines http_open to handle it
+ self.assert_(args[1] is None or
+ isinstance(args[1], MockResponse))
+
class MockHTTPResponse:
def __init__(self, fp, msg, status, reason):
@@ -668,7 +710,7 @@
def info(self): pass
def geturl(self): return ""
r = MockUnseekableResponse()
- newr = h.http_response(req, r)
+ newr = h.any_response(req, r)
self.assert_(not hasattr(r, "seek"))
self.assert_(hasattr(newr, "seek"))
More information about the wwwsearch-commits
mailing list