[wwwsearch-commits] r48480 - in wwwsearch/mechanize/trunk: . mechanize test

jjlee at codespeak.net jjlee at codespeak.net
Fri Nov 9 23:22:43 CET 2007


Author: jjlee
Date: Fri Nov  9 23:22:41 2007
New Revision: 48480

Added:
   wwwsearch/mechanize/trunk/test/test_robotfileparser.doctest
Modified:
   wwwsearch/mechanize/trunk/mechanize/_auth.py
   wwwsearch/mechanize/trunk/mechanize/_clientcookie.py
   wwwsearch/mechanize/trunk/mechanize/_gzip.py
   wwwsearch/mechanize/trunk/mechanize/_html.py
   wwwsearch/mechanize/trunk/mechanize/_http.py
   wwwsearch/mechanize/trunk/mechanize/_mechanize.py
   wwwsearch/mechanize/trunk/mechanize/_opener.py
   wwwsearch/mechanize/trunk/mechanize/_rfc3986.py
   wwwsearch/mechanize/trunk/mechanize/_useragent.py
   wwwsearch/mechanize/trunk/mechanize/_util.py
   wwwsearch/mechanize/trunk/test.py
   wwwsearch/mechanize/trunk/test/test_html.doctest
Log:
 * Fix thoroughly broken digest auth (still need functional test!) (trebor74hr at gmail.com)
 * Fix issue where more tags after <title> caused default parser to raise an exception
 * Fix MechanizeRobotFileParser.set_opener()
 * Fix a _gzip.py NameError (gzip support is experimental)
 * Remove unused imports revealed by running pyflakes
 * Run _useragent.py doctests


Modified: wwwsearch/mechanize/trunk/mechanize/_auth.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_auth.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_auth.py	Fri Nov  9 23:22:41 2007
@@ -11,9 +11,9 @@
 
 """
 
-import re, base64, urlparse, posixpath, md5, sha, sys, copy
+import re, base64, urlparse, posixpath, md5, os, random, sha, time, copy
 
-from urllib2 import BaseHandler
+from urllib2 import BaseHandler, HTTPError, parse_keqv_list, parse_http_list
 from urllib import getproxies, unquote, splittype, splituser, splitpasswd, \
      splitport
 

Modified: wwwsearch/mechanize/trunk/mechanize/_clientcookie.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_clientcookie.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_clientcookie.py	Fri Nov  9 23:22:41 2007
@@ -32,7 +32,7 @@
 
 """
 
-import sys, re, copy, time, struct, urllib, types, logging
+import sys, re, copy, time, urllib, types, logging
 try:
     import threading
     _threading = threading; del threading

Modified: wwwsearch/mechanize/trunk/mechanize/_gzip.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_gzip.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_gzip.py	Fri Nov  9 23:22:41 2007
@@ -79,7 +79,7 @@
     def readline(self, size=-1):
         return self.__data.readline(size)
     def readlines(self, sizehint=-1):
-        return self.__data.readlines(size)
+        return self.__data.readlines(sizehint)
 
     def __getattr__(self, name):
         # delegate unknown methods/attributes

Modified: wwwsearch/mechanize/trunk/mechanize/_html.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_html.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_html.py	Fri Nov  9 23:22:41 2007
@@ -9,7 +9,7 @@
 """
 
 import re, copy, htmlentitydefs
-import sgmllib, HTMLParser, ClientForm
+import sgmllib, ClientForm
 
 import _request
 from _headersutil import split_header_words, is_html as _is_html
@@ -238,12 +238,13 @@
         self._encoding = encoding
 
     def _get_title_text(self, parser):
+        import _pullparser
         text = []
         tok = None
         while 1:
             try:
                 tok = parser.get_token()
-            except NoMoreTokensError:
+            except _pullparser.NoMoreTokensError:
                 break
             if tok.type == "data":
                 text.append(str(tok))
@@ -321,7 +322,6 @@
     _beautifulsoup.BeautifulSoup, _beautifulsoup.ICantBelieveItsBeautifulSoup
     )
 # monkeypatch sgmllib to fix http://www.python.org/sf/803422 :-(
-import sgmllib
 sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
 
 class MechanizeBs(_beautifulsoup.BeautifulSoup):
@@ -361,7 +361,6 @@
                  link_class=Link,
                  urltags=None,
                  ):
-        import _beautifulsoup
         if link_parser_class is None:
             link_parser_class = MechanizeBs
         self.link_parser_class = link_parser_class
@@ -419,7 +418,6 @@
 
 class RobustFormsFactory(FormsFactory):
     def __init__(self, *args, **kwds):
-        import ClientForm
         args = form_parser_args(*args, **kwds)
         if args.form_parser_class is None:
             args.form_parser_class = RobustFormParser
@@ -622,7 +620,6 @@
         self._soup_class = soup_class
 
     def set_response(self, response):
-        import _beautifulsoup
         Factory.set_response(self, response)
         if response is not None:
             data = response.read()

Modified: wwwsearch/mechanize/trunk/mechanize/_http.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_http.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_http.py	Fri Nov  9 23:22:41 2007
@@ -12,17 +12,16 @@
 
 """
 
-import copy, time, tempfile, htmlentitydefs, re, logging, socket, \
+import time, htmlentitydefs, logging, socket, \
        urllib2, urllib, httplib, sgmllib
 from urllib2 import URLError, HTTPError, BaseHandler
 from cStringIO import StringIO
 
 from _request import Request
-from _util import isstringlike
 from _response import closeable_response, response_seek_wrapper
 from _html import unescape, unescape_charref
 from _headersutil import is_html
-from _clientcookie import CookieJar, request_host
+from _clientcookie import CookieJar
 import _rfc3986
 
 debug = logging.getLogger("mechanize").debug
@@ -350,11 +349,11 @@
     class MechanizeRobotFileParser(robotparser.RobotFileParser):
 
         def __init__(self, url='', opener=None):
-            import _opener
             robotparser.RobotFileParser.__init__(self, url)
             self._opener = opener
 
         def set_opener(self, opener=None):
+            import _opener
             if opener is None:
                 opener = _opener.OpenerDirector()
             self._opener = opener

Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_mechanize.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py	Fri Nov  9 23:22:41 2007
@@ -9,7 +9,7 @@
 
 """
 
-import urllib2, sys, copy, re, os, urllib
+import urllib2, copy, re, os, urllib
 
 
 from _useragent import UserAgentBase

Modified: wwwsearch/mechanize/trunk/mechanize/_opener.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_opener.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_opener.py	Fri Nov  9 23:22:41 2007
@@ -9,7 +9,7 @@
 
 """
 
-import os, urllib2, bisect, urllib, httplib, types, tempfile
+import os, urllib2, bisect, httplib, types, tempfile
 try:
     import threading as _threading
 except ImportError:

Modified: wwwsearch/mechanize/trunk/mechanize/_rfc3986.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_rfc3986.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_rfc3986.py	Fri Nov  9 23:22:41 2007
@@ -12,7 +12,7 @@
 
 # XXX Wow, this is ugly.  Overly-direct translation of the RFC ATM.
 
-import sys, re, posixpath, urllib
+import re, urllib
 
 ## def chr_range(a, b):
 ##     return "".join(map(chr, range(ord(a), ord(b)+1)))
@@ -96,6 +96,7 @@
 
 # oops, this doesn't do the same thing as the literal translation
 # from the RFC below
+## import posixpath
 ## def urljoin_parts(base_parts, reference_parts):
 ##     scheme, authority, path, query, fragment = base_parts
 ##     rscheme, rauthority, rpath, rquery, rfragment = reference_parts

Modified: wwwsearch/mechanize/trunk/mechanize/_useragent.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_useragent.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_useragent.py	Fri Nov  9 23:22:41 2007
@@ -11,7 +11,7 @@
 
 """
 
-import sys, warnings, urllib2
+import warnings
 
 import _opener
 import _urllib2

Modified: wwwsearch/mechanize/trunk/mechanize/_util.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_util.py	(original)
+++ wwwsearch/mechanize/trunk/mechanize/_util.py	Fri Nov  9 23:22:41 2007
@@ -8,7 +8,7 @@
 
 """
 
-import re, string, time, warnings
+import re, time, warnings
 
 def deprecation(message):
     warnings.warn(message, DeprecationWarning, stacklevel=3)

Modified: wwwsearch/mechanize/trunk/test.py
==============================================================================
--- wwwsearch/mechanize/trunk/test.py	(original)
+++ wwwsearch/mechanize/trunk/test.py	Fri Nov  9 23:22:41 2007
@@ -87,10 +87,19 @@
                 #os.path.join("test", "test_scratch.doctest"),
                 globs=globs,
                 )
+        try:
+            import robotparser
+        except ImportError:
+            pass
+        else:
+            doctest.testfile(os.path.join("test",
+                                          "test_robotfileparser.doctest"))
 
         # run .doctest files
         special_doctests = [pm_doctest_filename,
                             os.path.join("test", "test_scratch.doctest"),
+                            os.path.join("test",
+                                         "test_robotfileparser.doctest"),
                             ]
         doctest_files = glob.glob(os.path.join("test", "*.doctest"))
 
@@ -102,13 +111,14 @@
 
         # run doctests in docstrings
         from mechanize import _headersutil, _auth, _clientcookie, _pullparser, \
-             _http, _rfc3986
+             _http, _rfc3986, _useragent
         doctest.testmod(_headersutil)
         doctest.testmod(_rfc3986)
         doctest.testmod(_auth)
         doctest.testmod(_clientcookie)
         doctest.testmod(_pullparser)
         doctest.testmod(_http)
+        doctest.testmod(_useragent)
 
     if run_unittests:
         # run vanilla unittest tests

Modified: wwwsearch/mechanize/trunk/test/test_html.doctest
==============================================================================
--- wwwsearch/mechanize/trunk/test/test_html.doctest	(original)
+++ wwwsearch/mechanize/trunk/test/test_html.doctest	Fri Nov  9 23:22:41 2007
@@ -251,3 +251,12 @@
 'Ti<script type="text/strange">alert("this is valid HTML -- yuck!")</script> tle &&'
 >>> get_title_sgmllib(html)
 'Ti<script type="text/strange">alert("this is valid HTML -- yuck!")</script> tle &&'
+
+
+No more tags after <title> used to cause an exception
+
+>>> html = ("""\
+... <html><head>
+... <title>""")
+>>> get_title_sgmllib(html)
+''

Added: wwwsearch/mechanize/trunk/test/test_robotfileparser.doctest
==============================================================================
--- (empty file)
+++ wwwsearch/mechanize/trunk/test/test_robotfileparser.doctest	Fri Nov  9 23:22:41 2007
@@ -0,0 +1,8 @@
+>>> from mechanize._http import MechanizeRobotFileParser
+
+Calling .set_opener() without args sets a default opener.
+
+>>> rfp = MechanizeRobotFileParser()
+>>> rfp.set_opener()
+>>> rfp._opener  # doctest: +ELLIPSIS
+<mechanize._opener.OpenerDirector instance at ...>


More information about the wwwsearch-commits mailing list