[wwwsearch-commits] r36082 - in wwwsearch/mechanize/trunk: . mechanize

jjlee at codespeak.net jjlee at codespeak.net
Sun Dec 31 20:34:28 CET 2006


Author: jjlee
Date: Sun Dec 31 20:34:26 2006
New Revision: 36082

Modified:
   wwwsearch/mechanize/trunk/functional_tests.py
   wwwsearch/mechanize/trunk/mechanize/_mechanize.py
   wwwsearch/mechanize/trunk/mechanize/_response.py
Log:
More truncation bugs that show up with .back() usage if you don't .read() the whole response before navigating to the next page:  First, the .read_complete flag was busted because after .close() followed by e.g. .read(), .read_complete would be set true since eoffile always claims to be at end-of-file.  Fixed by preventing read_complete from changing post-.close().  Second, .back() was returning the old response, not the .reload()ed one.  Also, .back() was not returning a copy of the response, which presumably would cause seek position problems.

Modified: wwwsearch/mechanize/trunk/functional_tests.py
==============================================================================
--- wwwsearch/mechanize/trunk/functional_tests.py	(original)
+++ wwwsearch/mechanize/trunk/functional_tests.py	Sun Dec 31 20:34:26 2006
@@ -254,10 +254,20 @@
     def test_reload_read_incomplete(self):
         from mechanize import Browser
         browser = Browser()
-        browser.open("http://plone.org")
-        browser.open("http://plone.org/products")
-        browser.back()
-        browser.follow_link(text="About")
+        r1 = browser.open(
+            "http://wwwsearch.sf.net/bits/mechanize_reload_test.html")
+        # if we don't do anything and go straight to another page, most of the
+        # last page's response won't be .read()...
+        r2 = browser.open("http://wwwsearch.sf.net/mechanize")
+        self.assert_(len(r1.get_data()) < 4097)  # we only .read() a little bit
+        # ...so if we then go back, .follow_link() for a link near the end (a
+        # few kb in, past the point that always gets read in HTML files because
+        # of HEAD parsing) will only work if it causes a .reload()...
+        r3 = browser.back()
+        browser.follow_link(text="near the end")
+        # ... good, no LinkNotFoundError, so we did reload.
+        # we have .read() the whole file
+        self.assertEqual(len(r3._seek_wrapper__cache.getvalue()), 4202)
 
 ##     def test_cacheftp(self):
 ##         from urllib2 import CacheFTPHandler, build_opener

Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_mechanize.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py	Sun Dec 31 20:34:26 2006
@@ -300,8 +300,8 @@
         self.request, response = self._history.back(n, self._response)
         self.set_response(response)
         if not response.read_complete:
-            self.reload()
-        return response
+            return self.reload()
+        return copy.copy(response)
 
     def clear_history(self):
         self._history.clear()

Modified: wwwsearch/mechanize/trunk/mechanize/_response.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_response.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_response.py	Sun Dec 31 20:34:26 2006
@@ -43,7 +43,10 @@
     readlines method is always supported.  xreadlines and iteration are
     supported only for Python 2.2 and above.
 
-    Public attribute: wrapped (the wrapped file object).
+    Public attributes:
+
+    wrapped: the wrapped file object
+    is_closed: true iff .close() has been called
 
     WARNING: All other attributes of the wrapped object (ie. those that are not
     one of wrapped, read, readline, readlines, xreadlines, __iter__ and next)
@@ -60,6 +63,7 @@
     def __init__(self, wrapped):
         self.wrapped = wrapped
         self.__read_complete_state = [False]
+        self.__is_closed_state = [False]
         self.__have_readline = hasattr(self.wrapped, "readline")
         self.__cache = StringIO()
         self.__pos = 0  # seek position
@@ -69,8 +73,14 @@
         # wrapped file.
         return self.wrapped.tell() == len(self.__cache.getvalue())
 
+    def close(self):
+        self.wrapped.close()
+        self.is_closed = True
+
     def __getattr__(self, name):
-        if name == "read_complete":
+        if name == "is_closed":
+            return self.__is_closed_state[0]
+        elif name == "read_complete":
             return self.__read_complete_state[0]
 
         wrapped = self.__dict__.get("wrapped")
@@ -80,8 +90,11 @@
         return getattr(self.__class__, name)
 
     def __setattr__(self, name, value):
-        if name == "read_complete":
-            self.__read_complete_state[0] = bool(value)
+        if name == "is_closed":
+            self.__is_closed_state[0] = bool(value)
+        elif name == "read_complete":
+            if not self.is_closed:
+                self.__read_complete_state[0] = bool(value)
         else:
             self.__dict__[name] = value
 
@@ -138,6 +151,7 @@
         cpy = self.__class__(self.wrapped)
         cpy.__cache = self.__cache
         cpy.__read_complete_state = self.__read_complete_state
+        cpy.__is_closed_state = self.__is_closed_state
         return cpy
 
     def get_data(self):
@@ -222,8 +236,9 @@
     xreadlines = __iter__
 
     def __repr__(self):
-        return ("<%s at %s whose wrapped object = %r>" %
-                (self.__class__.__name__, hex(id(self)), self.wrapped))
+        return ("<%s at %s (%d) whose wrapped object = %r>" %
+                (self.__class__.__name__, hex(id(self)), self.__pos,
+                 self.wrapped))
 
 
 class response_seek_wrapper(seek_wrapper):


More information about the wwwsearch-commits mailing list