[wwwsearch-commits] r30731 - in wwwsearch/mechanize/trunk: . mechanize

jjlee at codespeak.net jjlee at codespeak.net
Sat Jul 29 17:02:21 CEST 2006


Author: jjlee
Date: Sat Jul 29 17:02:19 2006
New Revision: 30731

Modified:
   wwwsearch/mechanize/trunk/mechanize/_http.py
   wwwsearch/mechanize/trunk/test.py
Log:
Fix bug with quoted META Refresh URL (nilton.volpato at gmail.com); Clean up test.py a bit

Modified: wwwsearch/mechanize/trunk/mechanize/_http.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_http.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_http.py	Sat Jul 29 17:02:19 2006
@@ -400,6 +400,42 @@
     https_request = http_request
     https_response = http_response
 
+
+def clean_refresh_url(url):
+    # e.g. Firefox 1.5 does (something like) this
+    if ((url.startswith('"') and url.endswith('"')) or
+        (url.startswith("'") and url.endswith("'"))):
+        return url[1:-1]
+    return url
+
+def parse_refresh_header(refresh):
+    """
+    >>> parse_refresh_header("1; url=http://example.com/")
+    (1.0, 'http://example.com/')
+    >>> parse_refresh_header("1; url='http://example.com/'")
+    (1.0, 'http://example.com/')
+    >>> parse_refresh_header("1")
+    (1.0, None)
+    >>> parse_refresh_header("blah")
+    Traceback (most recent call last):
+    ValueError: invalid literal for float(): blah
+
+    """
+
+    ii = refresh.find(";")
+    if ii != -1:
+        pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
+        jj = newurl_spec.find("=")
+        key = None
+        if jj != -1:
+            key, newurl = newurl_spec[:jj], newurl_spec[jj+1:]
+            newurl = clean_refresh_url(newurl)
+        if key is None or key.strip().lower() != "url":
+            raise ValueError()
+    else:
+        pause, newurl = float(refresh), None
+    return pause, newurl
+
 class HTTPRefreshProcessor(BaseHandler):
     """Perform HTTP Refresh redirections.
 
@@ -429,18 +465,13 @@
 
         if code == 200 and hdrs.has_key("refresh"):
             refresh = hdrs.getheaders("refresh")[0]
-            ii = refresh.find(";")
-            if ii != -1:
-                pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
-                jj = newurl_spec.find("=")
-                key = None
-                if jj != -1:
-                    key, newurl = newurl_spec[:jj], newurl_spec[jj+1:]
-                if key is None or key.strip().lower() != "url":
-                    debug("bad Refresh header: %r" % refresh)
-                    return response
-            else:
-                pause, newurl = float(refresh), response.geturl()
+            try:
+                pause, newurl = parse_refresh_header(refresh)
+            except ValueError:
+                debug("bad Refresh header: %r" % refresh)
+                return response
+            if newurl is None:
+                newurl = response.geturl()
             if (self.max_time is None) or (pause <= self.max_time):
                 if pause > 1E-3 and self.honor_time:
                     time.sleep(pause)

Modified: wwwsearch/mechanize/trunk/test.py
==============================================================================
--- wwwsearch/mechanize/trunk/test.py	(original)
+++ wwwsearch/mechanize/trunk/test.py	Sat Jul 29 17:02:19 2006
@@ -14,7 +14,7 @@
                 "test_headers", "test_urllib2", "test_pullparser",
                 ]
 
-import sys, os, traceback, logging
+import sys, os, traceback, logging, glob
 from unittest import defaultTestLoader, TextTestRunner, TestSuite, TestCase
 
 level = logging.DEBUG
@@ -108,6 +108,8 @@
 ##     __builtin__.jjl = jjl
 
     # XXX temporary stop-gap to run doctests
+
+    # import local copy of Python 2.5 doctest
     assert os.path.isdir("test")
     sys.path.insert(0, "test")
     # needed for recent doctest / linecache -- this is only for testing
@@ -118,29 +120,43 @@
     # that renamed module.
     sys.path.insert(0, "test-tools")
     import doctest
+
     import mechanize
+
+    # run .doctest files needing special support
     common_globs = {"mechanize": mechanize}
+    pm_doctest_filename = os.path.join("test", "test_password_manager.doctest")
     for globs in [
         {"mgr_class": mechanize.HTTPPasswordMgr},
         {"mgr_class": mechanize.HTTPProxyPasswordMgr},
         ]:
         globs.update(common_globs)
         doctest.testfile(
-            os.path.join("test", "test_password_manager.doctest"),
+            pm_doctest_filename,
             #os.path.join("test", "test_scratch.doctest"),
             globs=globs,
             )
-    
-    doctest.testfile(os.path.join("test", "test_rfc3986.doctest"))
-    doctest.testfile(os.path.join("test", "test_request.doctest"))
-    doctest.testfile(os.path.join("test", "test_history.doctest"))
-    doctest.testfile(os.path.join("test", "test_html.doctest"))
-    from mechanize import _headersutil, _auth, _clientcookie, _pullparser
+
+    # run .doctest files
+    special_doctests = [pm_doctest_filename,
+                        os.path.join("test", "test_scratch.doctest"),
+                        ]
+    doctest_files = glob.glob(os.path.join("test", "*.doctest"))
+    for dt in special_doctests:
+        doctest_files.remove(dt)
+    for df in doctest_files:
+        doctest.testfile(df)
+
+    # run doctests in docstrings
+    from mechanize import _headersutil, _auth, _clientcookie, _pullparser, \
+         _http
     doctest.testmod(_headersutil)
     doctest.testmod(_auth)
     doctest.testmod(_clientcookie)
     doctest.testmod(_pullparser)
+    doctest.testmod(_http)
 
+    # run vanilla unittest tests
     import unittest
     test_path = os.path.join(os.path.dirname(sys.argv[0]), "test")
     sys.path.insert(0, test_path)


More information about the wwwsearch-commits mailing list