[wwwsearch-commits] r36082 - in wwwsearch/mechanize/trunk: . mechanize
jjlee at codespeak.net
jjlee at codespeak.net
Sun Dec 31 20:34:28 CET 2006
Author: jjlee
Date: Sun Dec 31 20:34:26 2006
New Revision: 36082
Modified:
wwwsearch/mechanize/trunk/functional_tests.py
wwwsearch/mechanize/trunk/mechanize/_mechanize.py
wwwsearch/mechanize/trunk/mechanize/_response.py
Log:
More truncation bugs that show up with .back() usage if you don't .read() the whole response before navigating to the next page: First, the .read_complete flag was busted because after .close() followed by e.g. .read(), .read_complete would be set true since eoffile always claims to be at end-of-file. Fixed by preventing read_complete from changing post-.close(). Second, .back() was returning the old response, not the .reload()ed one. Also, .back() was not returning a copy of the response, which presumably would cause seek position problems.
Modified: wwwsearch/mechanize/trunk/functional_tests.py
==============================================================================
--- wwwsearch/mechanize/trunk/functional_tests.py (original)
+++ wwwsearch/mechanize/trunk/functional_tests.py Sun Dec 31 20:34:26 2006
@@ -254,10 +254,20 @@
def test_reload_read_incomplete(self):
from mechanize import Browser
browser = Browser()
- browser.open("http://plone.org")
- browser.open("http://plone.org/products")
- browser.back()
- browser.follow_link(text="About")
+ r1 = browser.open(
+ "http://wwwsearch.sf.net/bits/mechanize_reload_test.html")
+ # if we don't do anything and go straight to another page, most of the
+ # last page's response won't be .read()...
+ r2 = browser.open("http://wwwsearch.sf.net/mechanize")
+ self.assert_(len(r1.get_data()) < 4097) # we only .read() a little bit
+ # ...so if we then go back, .follow_link() for a link near the end (a
+ # few kb in, past the point that always gets read in HTML files because
+ # of HEAD parsing) will only work if it causes a .reload()...
+ r3 = browser.back()
+ browser.follow_link(text="near the end")
+ # ... good, no LinkNotFoundError, so we did reload.
+ # we have .read() the whole file
+ self.assertEqual(len(r3._seek_wrapper__cache.getvalue()), 4202)
## def test_cacheftp(self):
## from urllib2 import CacheFTPHandler, build_opener
Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Sun Dec 31 20:34:26 2006
@@ -300,8 +300,8 @@
self.request, response = self._history.back(n, self._response)
self.set_response(response)
if not response.read_complete:
- self.reload()
- return response
+ return self.reload()
+ return copy.copy(response)
def clear_history(self):
self._history.clear()
Modified: wwwsearch/mechanize/trunk/mechanize/_response.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_response.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_response.py Sun Dec 31 20:34:26 2006
@@ -43,7 +43,10 @@
readlines method is always supported. xreadlines and iteration are
supported only for Python 2.2 and above.
- Public attribute: wrapped (the wrapped file object).
+ Public attributes:
+
+ wrapped: the wrapped file object
+ is_closed: true iff .close() has been called
WARNING: All other attributes of the wrapped object (ie. those that are not
one of wrapped, read, readline, readlines, xreadlines, __iter__ and next)
@@ -60,6 +63,7 @@
def __init__(self, wrapped):
self.wrapped = wrapped
self.__read_complete_state = [False]
+ self.__is_closed_state = [False]
self.__have_readline = hasattr(self.wrapped, "readline")
self.__cache = StringIO()
self.__pos = 0 # seek position
@@ -69,8 +73,14 @@
# wrapped file.
return self.wrapped.tell() == len(self.__cache.getvalue())
+ def close(self):
+ self.wrapped.close()
+ self.is_closed = True
+
def __getattr__(self, name):
- if name == "read_complete":
+ if name == "is_closed":
+ return self.__is_closed_state[0]
+ elif name == "read_complete":
return self.__read_complete_state[0]
wrapped = self.__dict__.get("wrapped")
@@ -80,8 +90,11 @@
return getattr(self.__class__, name)
def __setattr__(self, name, value):
- if name == "read_complete":
- self.__read_complete_state[0] = bool(value)
+ if name == "is_closed":
+ self.__is_closed_state[0] = bool(value)
+ elif name == "read_complete":
+ if not self.is_closed:
+ self.__read_complete_state[0] = bool(value)
else:
self.__dict__[name] = value
@@ -138,6 +151,7 @@
cpy = self.__class__(self.wrapped)
cpy.__cache = self.__cache
cpy.__read_complete_state = self.__read_complete_state
+ cpy.__is_closed_state = self.__is_closed_state
return cpy
def get_data(self):
@@ -222,8 +236,9 @@
xreadlines = __iter__
def __repr__(self):
- return ("<%s at %s whose wrapped object = %r>" %
- (self.__class__.__name__, hex(id(self)), self.wrapped))
+ return ("<%s at %s (%d) whose wrapped object = %r>" %
+ (self.__class__.__name__, hex(id(self)), self.__pos,
+ self.wrapped))
class response_seek_wrapper(seek_wrapper):
More information about the wwwsearch-commits
mailing list