[wwwsearch-commits] r48480 - in wwwsearch/mechanize/trunk: . mechanize test
jjlee at codespeak.net
jjlee at codespeak.net
Fri Nov 9 23:22:43 CET 2007
Author: jjlee
Date: Fri Nov 9 23:22:41 2007
New Revision: 48480
Added:
wwwsearch/mechanize/trunk/test/test_robotfileparser.doctest
Modified:
wwwsearch/mechanize/trunk/mechanize/_auth.py
wwwsearch/mechanize/trunk/mechanize/_clientcookie.py
wwwsearch/mechanize/trunk/mechanize/_gzip.py
wwwsearch/mechanize/trunk/mechanize/_html.py
wwwsearch/mechanize/trunk/mechanize/_http.py
wwwsearch/mechanize/trunk/mechanize/_mechanize.py
wwwsearch/mechanize/trunk/mechanize/_opener.py
wwwsearch/mechanize/trunk/mechanize/_rfc3986.py
wwwsearch/mechanize/trunk/mechanize/_useragent.py
wwwsearch/mechanize/trunk/mechanize/_util.py
wwwsearch/mechanize/trunk/test.py
wwwsearch/mechanize/trunk/test/test_html.doctest
Log:
* Fix thoroughly broken digest auth (still need functional test!) (trebor74hr at gmail.com)
* Fix issue where more tags after <title> caused default parser to raise an exception
* Fix MechanizeRobotFileParser.set_opener()
* Fix a _gzip.py NameError (gzip support is experimental)
* Remove unused imports revealed by running pyflakes
* Run _useragent.py doctests
Modified: wwwsearch/mechanize/trunk/mechanize/_auth.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_auth.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_auth.py Fri Nov 9 23:22:41 2007
@@ -11,9 +11,9 @@
"""
-import re, base64, urlparse, posixpath, md5, sha, sys, copy
+import re, base64, urlparse, posixpath, md5, os, random, sha, time, copy
-from urllib2 import BaseHandler
+from urllib2 import BaseHandler, HTTPError, parse_keqv_list, parse_http_list
from urllib import getproxies, unquote, splittype, splituser, splitpasswd, \
splitport
Modified: wwwsearch/mechanize/trunk/mechanize/_clientcookie.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_clientcookie.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_clientcookie.py Fri Nov 9 23:22:41 2007
@@ -32,7 +32,7 @@
"""
-import sys, re, copy, time, struct, urllib, types, logging
+import sys, re, copy, time, urllib, types, logging
try:
import threading
_threading = threading; del threading
Modified: wwwsearch/mechanize/trunk/mechanize/_gzip.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_gzip.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_gzip.py Fri Nov 9 23:22:41 2007
@@ -79,7 +79,7 @@
def readline(self, size=-1):
return self.__data.readline(size)
def readlines(self, sizehint=-1):
- return self.__data.readlines(size)
+ return self.__data.readlines(sizehint)
def __getattr__(self, name):
# delegate unknown methods/attributes
Modified: wwwsearch/mechanize/trunk/mechanize/_html.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_html.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_html.py Fri Nov 9 23:22:41 2007
@@ -9,7 +9,7 @@
"""
import re, copy, htmlentitydefs
-import sgmllib, HTMLParser, ClientForm
+import sgmllib, ClientForm
import _request
from _headersutil import split_header_words, is_html as _is_html
@@ -238,12 +238,13 @@
self._encoding = encoding
def _get_title_text(self, parser):
+ import _pullparser
text = []
tok = None
while 1:
try:
tok = parser.get_token()
- except NoMoreTokensError:
+ except _pullparser.NoMoreTokensError:
break
if tok.type == "data":
text.append(str(tok))
@@ -321,7 +322,6 @@
_beautifulsoup.BeautifulSoup, _beautifulsoup.ICantBelieveItsBeautifulSoup
)
# monkeypatch sgmllib to fix http://www.python.org/sf/803422 :-(
-import sgmllib
sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
class MechanizeBs(_beautifulsoup.BeautifulSoup):
@@ -361,7 +361,6 @@
link_class=Link,
urltags=None,
):
- import _beautifulsoup
if link_parser_class is None:
link_parser_class = MechanizeBs
self.link_parser_class = link_parser_class
@@ -419,7 +418,6 @@
class RobustFormsFactory(FormsFactory):
def __init__(self, *args, **kwds):
- import ClientForm
args = form_parser_args(*args, **kwds)
if args.form_parser_class is None:
args.form_parser_class = RobustFormParser
@@ -622,7 +620,6 @@
self._soup_class = soup_class
def set_response(self, response):
- import _beautifulsoup
Factory.set_response(self, response)
if response is not None:
data = response.read()
Modified: wwwsearch/mechanize/trunk/mechanize/_http.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_http.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_http.py Fri Nov 9 23:22:41 2007
@@ -12,17 +12,16 @@
"""
-import copy, time, tempfile, htmlentitydefs, re, logging, socket, \
+import time, htmlentitydefs, logging, socket, \
urllib2, urllib, httplib, sgmllib
from urllib2 import URLError, HTTPError, BaseHandler
from cStringIO import StringIO
from _request import Request
-from _util import isstringlike
from _response import closeable_response, response_seek_wrapper
from _html import unescape, unescape_charref
from _headersutil import is_html
-from _clientcookie import CookieJar, request_host
+from _clientcookie import CookieJar
import _rfc3986
debug = logging.getLogger("mechanize").debug
@@ -350,11 +349,11 @@
class MechanizeRobotFileParser(robotparser.RobotFileParser):
def __init__(self, url='', opener=None):
- import _opener
robotparser.RobotFileParser.__init__(self, url)
self._opener = opener
def set_opener(self, opener=None):
+ import _opener
if opener is None:
opener = _opener.OpenerDirector()
self._opener = opener
Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Fri Nov 9 23:22:41 2007
@@ -9,7 +9,7 @@
"""
-import urllib2, sys, copy, re, os, urllib
+import urllib2, copy, re, os, urllib
from _useragent import UserAgentBase
Modified: wwwsearch/mechanize/trunk/mechanize/_opener.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_opener.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_opener.py Fri Nov 9 23:22:41 2007
@@ -9,7 +9,7 @@
"""
-import os, urllib2, bisect, urllib, httplib, types, tempfile
+import os, urllib2, bisect, httplib, types, tempfile
try:
import threading as _threading
except ImportError:
Modified: wwwsearch/mechanize/trunk/mechanize/_rfc3986.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_rfc3986.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_rfc3986.py Fri Nov 9 23:22:41 2007
@@ -12,7 +12,7 @@
# XXX Wow, this is ugly. Overly-direct translation of the RFC ATM.
-import sys, re, posixpath, urllib
+import re, urllib
## def chr_range(a, b):
## return "".join(map(chr, range(ord(a), ord(b)+1)))
@@ -96,6 +96,7 @@
# oops, this doesn't do the same thing as the literal translation
# from the RFC below
+## import posixpath
## def urljoin_parts(base_parts, reference_parts):
## scheme, authority, path, query, fragment = base_parts
## rscheme, rauthority, rpath, rquery, rfragment = reference_parts
Modified: wwwsearch/mechanize/trunk/mechanize/_useragent.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_useragent.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_useragent.py Fri Nov 9 23:22:41 2007
@@ -11,7 +11,7 @@
"""
-import sys, warnings, urllib2
+import warnings
import _opener
import _urllib2
Modified: wwwsearch/mechanize/trunk/mechanize/_util.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_util.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_util.py Fri Nov 9 23:22:41 2007
@@ -8,7 +8,7 @@
"""
-import re, string, time, warnings
+import re, time, warnings
def deprecation(message):
warnings.warn(message, DeprecationWarning, stacklevel=3)
Modified: wwwsearch/mechanize/trunk/test.py
==============================================================================
--- wwwsearch/mechanize/trunk/test.py (original)
+++ wwwsearch/mechanize/trunk/test.py Fri Nov 9 23:22:41 2007
@@ -87,10 +87,19 @@
#os.path.join("test", "test_scratch.doctest"),
globs=globs,
)
+ try:
+ import robotparser
+ except ImportError:
+ pass
+ else:
+ doctest.testfile(os.path.join("test",
+ "test_robotfileparser.doctest"))
# run .doctest files
special_doctests = [pm_doctest_filename,
os.path.join("test", "test_scratch.doctest"),
+ os.path.join("test",
+ "test_robotfileparser.doctest"),
]
doctest_files = glob.glob(os.path.join("test", "*.doctest"))
@@ -102,13 +111,14 @@
# run doctests in docstrings
from mechanize import _headersutil, _auth, _clientcookie, _pullparser, \
- _http, _rfc3986
+ _http, _rfc3986, _useragent
doctest.testmod(_headersutil)
doctest.testmod(_rfc3986)
doctest.testmod(_auth)
doctest.testmod(_clientcookie)
doctest.testmod(_pullparser)
doctest.testmod(_http)
+ doctest.testmod(_useragent)
if run_unittests:
# run vanilla unittest tests
Modified: wwwsearch/mechanize/trunk/test/test_html.doctest
==============================================================================
--- wwwsearch/mechanize/trunk/test/test_html.doctest (original)
+++ wwwsearch/mechanize/trunk/test/test_html.doctest Fri Nov 9 23:22:41 2007
@@ -251,3 +251,12 @@
'Ti<script type="text/strange">alert("this is valid HTML -- yuck!")</script> tle &&'
>>> get_title_sgmllib(html)
'Ti<script type="text/strange">alert("this is valid HTML -- yuck!")</script> tle &&'
+
+
+No more tags after <title> used to cause an exception
+
+>>> html = ("""\
+... <html><head>
+... <title>""")
+>>> get_title_sgmllib(html)
+''
Added: wwwsearch/mechanize/trunk/test/test_robotfileparser.doctest
==============================================================================
--- (empty file)
+++ wwwsearch/mechanize/trunk/test/test_robotfileparser.doctest Fri Nov 9 23:22:41 2007
@@ -0,0 +1,8 @@
+>>> from mechanize._http import MechanizeRobotFileParser
+
+Calling .set_opener() without args sets a default opener.
+
+>>> rfp = MechanizeRobotFileParser()
+>>> rfp.set_opener()
+>>> rfp._opener # doctest: +ELLIPSIS
+<mechanize._opener.OpenerDirector instance at ...>
More information about the wwwsearch-commits
mailing list