[wwwsearch-commits] r30731 - in wwwsearch/mechanize/trunk: . mechanize
jjlee at codespeak.net
jjlee at codespeak.net
Sat Jul 29 17:02:21 CEST 2006
Author: jjlee
Date: Sat Jul 29 17:02:19 2006
New Revision: 30731
Modified:
wwwsearch/mechanize/trunk/mechanize/_http.py
wwwsearch/mechanize/trunk/test.py
Log:
Fix bug with quoted META Refresh URL (nilton.volpato at gmail.com); Clean up test.py a bit
Modified: wwwsearch/mechanize/trunk/mechanize/_http.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_http.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_http.py Sat Jul 29 17:02:19 2006
@@ -400,6 +400,42 @@
https_request = http_request
https_response = http_response
+
+def clean_refresh_url(url):
+ # e.g. Firefox 1.5 does (something like) this
+ if ((url.startswith('"') and url.endswith('"')) or
+ (url.startswith("'") and url.endswith("'"))):
+ return url[1:-1]
+ return url
+
+def parse_refresh_header(refresh):
+ """
+ >>> parse_refresh_header("1; url=http://example.com/")
+ (1.0, 'http://example.com/')
+ >>> parse_refresh_header("1; url='http://example.com/'")
+ (1.0, 'http://example.com/')
+ >>> parse_refresh_header("1")
+ (1.0, None)
+ >>> parse_refresh_header("blah")
+ Traceback (most recent call last):
+ ValueError: invalid literal for float(): blah
+
+ """
+
+ ii = refresh.find(";")
+ if ii != -1:
+ pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
+ jj = newurl_spec.find("=")
+ key = None
+ if jj != -1:
+ key, newurl = newurl_spec[:jj], newurl_spec[jj+1:]
+ newurl = clean_refresh_url(newurl)
+ if key is None or key.strip().lower() != "url":
+ raise ValueError()
+ else:
+ pause, newurl = float(refresh), None
+ return pause, newurl
+
class HTTPRefreshProcessor(BaseHandler):
"""Perform HTTP Refresh redirections.
@@ -429,18 +465,13 @@
if code == 200 and hdrs.has_key("refresh"):
refresh = hdrs.getheaders("refresh")[0]
- ii = refresh.find(";")
- if ii != -1:
- pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
- jj = newurl_spec.find("=")
- key = None
- if jj != -1:
- key, newurl = newurl_spec[:jj], newurl_spec[jj+1:]
- if key is None or key.strip().lower() != "url":
- debug("bad Refresh header: %r" % refresh)
- return response
- else:
- pause, newurl = float(refresh), response.geturl()
+ try:
+ pause, newurl = parse_refresh_header(refresh)
+ except ValueError:
+ debug("bad Refresh header: %r" % refresh)
+ return response
+ if newurl is None:
+ newurl = response.geturl()
if (self.max_time is None) or (pause <= self.max_time):
if pause > 1E-3 and self.honor_time:
time.sleep(pause)
Modified: wwwsearch/mechanize/trunk/test.py
==============================================================================
--- wwwsearch/mechanize/trunk/test.py (original)
+++ wwwsearch/mechanize/trunk/test.py Sat Jul 29 17:02:19 2006
@@ -14,7 +14,7 @@
"test_headers", "test_urllib2", "test_pullparser",
]
-import sys, os, traceback, logging
+import sys, os, traceback, logging, glob
from unittest import defaultTestLoader, TextTestRunner, TestSuite, TestCase
level = logging.DEBUG
@@ -108,6 +108,8 @@
## __builtin__.jjl = jjl
# XXX temporary stop-gap to run doctests
+
+ # import local copy of Python 2.5 doctest
assert os.path.isdir("test")
sys.path.insert(0, "test")
# needed for recent doctest / linecache -- this is only for testing
@@ -118,29 +120,43 @@
# that renamed module.
sys.path.insert(0, "test-tools")
import doctest
+
import mechanize
+
+ # run .doctest files needing special support
common_globs = {"mechanize": mechanize}
+ pm_doctest_filename = os.path.join("test", "test_password_manager.doctest")
for globs in [
{"mgr_class": mechanize.HTTPPasswordMgr},
{"mgr_class": mechanize.HTTPProxyPasswordMgr},
]:
globs.update(common_globs)
doctest.testfile(
- os.path.join("test", "test_password_manager.doctest"),
+ pm_doctest_filename,
#os.path.join("test", "test_scratch.doctest"),
globs=globs,
)
-
- doctest.testfile(os.path.join("test", "test_rfc3986.doctest"))
- doctest.testfile(os.path.join("test", "test_request.doctest"))
- doctest.testfile(os.path.join("test", "test_history.doctest"))
- doctest.testfile(os.path.join("test", "test_html.doctest"))
- from mechanize import _headersutil, _auth, _clientcookie, _pullparser
+
+ # run .doctest files
+ special_doctests = [pm_doctest_filename,
+ os.path.join("test", "test_scratch.doctest"),
+ ]
+ doctest_files = glob.glob(os.path.join("test", "*.doctest"))
+ for dt in special_doctests:
+ doctest_files.remove(dt)
+ for df in doctest_files:
+ doctest.testfile(df)
+
+ # run doctests in docstrings
+ from mechanize import _headersutil, _auth, _clientcookie, _pullparser, \
+ _http
doctest.testmod(_headersutil)
doctest.testmod(_auth)
doctest.testmod(_clientcookie)
doctest.testmod(_pullparser)
+ doctest.testmod(_http)
+ # run vanilla unittest tests
import unittest
test_path = os.path.join(os.path.dirname(sys.argv[0]), "test")
sys.path.insert(0, test_path)
More information about the wwwsearch-commits
mailing list