[wwwsearch-commits] r36029 - in wwwsearch/mechanize/trunk: mechanize test

jjlee at codespeak.net jjlee at codespeak.net
Thu Dec 28 22:00:26 CET 2006


Author: jjlee
Date: Thu Dec 28 22:00:25 2006
New Revision: 36029

Modified:
   wwwsearch/mechanize/trunk/mechanize/_http.py
   wwwsearch/mechanize/trunk/test/test_urllib2.py
Log:
Fix redirection to 'URIs' that contain characters that are not allowed in URIs (riko.wichmann at gmx.de)

Modified: wwwsearch/mechanize/trunk/mechanize/_http.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_http.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_http.py	Thu Dec 28 22:00:25 2006
@@ -99,6 +99,7 @@
             newurl = headers.getheaders('uri')[0]
         else:
             return
+        newurl = _rfc3986.clean_url(newurl, "latin-1")
         newurl = _rfc3986.urljoin(req.get_full_url(), newurl)
 
         # XXX Probably want to forget about the state of the current
@@ -469,8 +470,8 @@
     # e.g. Firefox 1.5 does (something like) this
     if ((url.startswith('"') and url.endswith('"')) or
         (url.startswith("'") and url.endswith("'"))):
-        return url[1:-1]
-    return url
+        url = url[1:-1]
+    return _rfc3986.clean_url(url, "latin-1")  # XXX encoding
 
 def parse_refresh_header(refresh):
     """

Modified: wwwsearch/mechanize/trunk/test/test_urllib2.py
==============================================================================
--- wwwsearch/mechanize/trunk/test/test_urllib2.py	(original)
+++ wwwsearch/mechanize/trunk/test/test_urllib2.py	Thu Dec 28 22:00:25 2006
@@ -886,6 +886,39 @@
         except mechanize.HTTPError:
             self.assert_(count == HTTPRedirectHandler.max_redirections)
 
+    def test_redirect_bad_uri(self):
+        # bad URIs should be cleaned up before redirection
+        from mechanize._response import test_html_response
+        from_url = "http://example.com/a.html"
+        bad_to_url = "http://example.com/b. |html"
+        good_to_url = "http://example.com/b.%20%7Chtml"
+
+        h = HTTPRedirectHandler()
+        o = h.parent = MockOpener()
+
+        req = Request(from_url)
+        h.http_error_302(req, test_html_response(), 302, "Blah",
+                         http_message({"location": bad_to_url}),
+                         )
+        self.assertEqual(o.req.get_full_url(), good_to_url)
+
+    def test_refresh_bad_uri(self):
+        # bad URIs should be cleaned up before redirection
+        from mechanize._response import test_html_response
+        from_url = "http://example.com/a.html"
+        bad_to_url = "http://example.com/b. |html"
+        good_to_url = "http://example.com/b.%20%7Chtml"
+
+        h = HTTPRefreshProcessor(max_time=None, honor_time=False)
+        o = h.parent = MockOpener()
+
+        req = Request("http://example.com/")
+        r = test_html_response(
+            headers=[("refresh", '0; url="%s"' % bad_to_url)])
+        newr = h.http_response(req, r)
+        headers = o.args[-1]
+        self.assertEqual(headers["Location"], good_to_url)
+
     def test_cookie_redirect(self):
         # cookies shouldn't leak into redirected requests
         import mechanize


More information about the wwwsearch-commits mailing list