From jjlee at codespeak.net Wed Jul 5 22:18:03 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Wed, 5 Jul 2006 22:18:03 +0200 (CEST) Subject: [wwwsearch-commits] r29654 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20060705201803.27FD1100A7@code0.codespeak.net> Author: jjlee Date: Wed Jul 5 22:18:02 2006 New Revision: 29654 Added: wwwsearch/mechanize/trunk/mechanize/_seek.py Log: Add missing module _seek.py Added: wwwsearch/mechanize/trunk/mechanize/_seek.py ============================================================================== --- (empty file) +++ wwwsearch/mechanize/trunk/mechanize/_seek.py Wed Jul 5 22:18:02 2006 @@ -0,0 +1,11 @@ +from urllib2 import BaseHandler +from _response import response_seek_wrapper + + +class SeekableProcessor(BaseHandler): + """Make responses seekable.""" + + def any_response(self, request, response): + if not hasattr(response, "seek"): + return response_seek_wrapper(response) + return response From jjlee at codespeak.net Wed Jul 5 22:18:32 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Wed, 5 Jul 2006 22:18:32 +0200 (CEST) Subject: [wwwsearch-commits] r29655 - wwwsearch/mechanize/trunk Message-ID: <20060705201832.420C8100AC@code0.codespeak.net> Author: jjlee Date: Wed Jul 5 22:18:31 2006 New Revision: 29655 Modified: wwwsearch/mechanize/trunk/0.1-changes.txt Log: Explain how to get a sequence from iterable return values Modified: wwwsearch/mechanize/trunk/0.1-changes.txt ============================================================================== --- wwwsearch/mechanize/trunk/0.1-changes.txt (original) +++ wwwsearch/mechanize/trunk/0.1-changes.txt Wed Jul 5 22:18:31 2006 @@ -31,7 +31,7 @@ - .forms() and .links() now both return iterators (in fact, generators), not sequences (not really an interface change: these were always documented to return iterables, but it will no doubt break some client - code). + code). Use e.g. list(browser.forms()) if you want a list. - .links no longer raises LinkNotFoundError (was accidental -- only .click_link() / .find_link() should raise this). From jjlee at codespeak.net Wed Jul 5 22:54:42 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Wed, 5 Jul 2006 22:54:42 +0200 (CEST) Subject: [wwwsearch-commits] r29657 - in wwwsearch/mechanize/trunk: mechanize test Message-ID: <20060705205442.6BE44100AF@code0.codespeak.net> Author: jjlee Date: Wed Jul 5 22:54:41 2006 New Revision: 29657 Modified: wwwsearch/mechanize/trunk/mechanize/_http.py wwwsearch/mechanize/trunk/test/test_urllib2.py Log: Fix UnboundLocalError for Refresh with URL but no '=' (titus) Modified: wwwsearch/mechanize/trunk/mechanize/_http.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_http.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_http.py Wed Jul 5 22:54:41 2006 @@ -433,9 +433,10 @@ if ii != -1: pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:] jj = newurl_spec.find("=") + key = None if jj != -1: key, newurl = newurl_spec[:jj], newurl_spec[jj+1:] - if key.strip().lower() != "url": + if key is None or key.strip().lower() != "url": debug("bad Refresh header: %r" % refresh) return response else: Modified: wwwsearch/mechanize/trunk/test/test_urllib2.py ============================================================================== --- wwwsearch/mechanize/trunk/test/test_urllib2.py (original) +++ wwwsearch/mechanize/trunk/test/test_urllib2.py Wed Jul 5 22:54:41 2006 @@ -778,14 +778,20 @@ # XXX test processor constructor optional args h = HTTPRefreshProcessor(max_time=None, honor_time=False) - for val in ['0; url="http://example.com/foo/"', "2"]: + for val, valid in [ + ('0; url="http://example.com/foo/"', True), + ("2", True), + # in the past, this failed with UnboundLocalError + ('0; "http://example.com/foo/"', False), + ]: o = h.parent = MockOpener() req = Request("http://example.com/") headers = MockHeaders({"refresh": val}) r = MockResponse(200, "OK", headers, "") newr = h.http_response(req, r) - self.assertEqual(o.proto, "http") - self.assertEqual(o.args, (req, r, "refresh", "OK", headers)) + if valid: + self.assertEqual(o.proto, "http") + self.assertEqual(o.args, (req, r, "refresh", "OK", headers)) def test_redirect(self): from_url = "http://example.com/a.html" From jjlee at codespeak.net Sun Jul 9 14:35:54 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sun, 9 Jul 2006 14:35:54 +0200 (CEST) Subject: [wwwsearch-commits] r29894 - wwwsearch/mechanize/trunk Message-ID: <20060709123554.8C1FA1007B@code0.codespeak.net> Author: jjlee Date: Sun Jul 9 14:35:52 2006 New Revision: 29894 Modified: wwwsearch/mechanize/trunk/README.html.in Log: HTML fixes Modified: wwwsearch/mechanize/trunk/README.html.in ============================================================================== --- wwwsearch/mechanize/trunk/README.html.in (original) +++ wwwsearch/mechanize/trunk/README.html.in Sun Jul 9 14:35:52 2006 @@ -171,7 +171,7 @@ """)} -so anything you would normally import from urllib2 can +

so anything you would normally import from urllib2 can (and should, by preference, to insulate you from future changes) be imported from mechanize instead. In many cases if you import an object from mechanize it will be the very same object you would get if @@ -333,7 +333,8 @@

  • Make EncodingFinder public, I guess (but probably improve it first). (For example: support Mark Pilgrim's universal encoding detector?) -
  • Add two-way links between BeautifulSoup & ClientForm object models. +
  • Add two-way links between BeautifulSoup & ClientForm object + models.
  • In 0.2: switch to Python unicode strings everywhere appropriate (HTTP level should still use byte strings, of course).
  • clean_url(): test browser behaviour. I think From jjlee at codespeak.net Sun Jul 9 14:37:52 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sun, 9 Jul 2006 14:37:52 +0200 (CEST) Subject: [wwwsearch-commits] r29895 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20060709123752.1AE361007B@code0.codespeak.net> Author: jjlee Date: Sun Jul 9 14:37:50 2006 New Revision: 29895 Modified: wwwsearch/mechanize/trunk/mechanize/_html.py Log: Simplify CachingGeneratorFunction implementation a bit Modified: wwwsearch/mechanize/trunk/mechanize/_html.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_html.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_html.py Sun Jul 9 14:37:50 2006 @@ -40,19 +40,15 @@ def __init__(self, iterable): self._cache = [] - # wrap iterable to make it non-restartable (otherwise, repeated # __call__ would give incorrect results) - def make_gen(): - for item in iterable: - yield item - self._generator = make_gen() + self._iterator = iter(iterable) def __call__(self): cache = self._cache for item in cache: yield item - for item in self._generator: + for item in self._iterator: cache.append(item) yield item From jjlee at codespeak.net Sun Jul 9 15:35:44 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sun, 9 Jul 2006 15:35:44 +0200 (CEST) Subject: [wwwsearch-commits] r29900 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20060709133544.D93BD1007E@code0.codespeak.net> Author: jjlee Date: Sun Jul 9 15:35:44 2006 New Revision: 29900 Modified: wwwsearch/mechanize/trunk/mechanize/_html.py Log: Fix RobustTitleFactory (titus) Modified: wwwsearch/mechanize/trunk/mechanize/_html.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_html.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_html.py Sun Jul 9 15:35:44 2006 @@ -413,7 +413,7 @@ self._bs = soup self._encoding = encoding - def title(soup): + def title(self): import BeautifulSoup title = self._bs.first("title") if title == BeautifulSoup.Null: From jjlee at codespeak.net Wed Jul 12 00:04:18 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Wed, 12 Jul 2006 00:04:18 +0200 (CEST) Subject: [wwwsearch-commits] r29964 - wwwsearch/mechanize/trunk Message-ID: <20060711220418.499B21007E@code0.codespeak.net> Author: jjlee Date: Wed Jul 12 00:04:17 2006 New Revision: 29964 Modified: wwwsearch/mechanize/trunk/README.html.in wwwsearch/mechanize/trunk/setup.py Log: Declare a setuptools dependency on BeautifulSoup version 2 Modified: wwwsearch/mechanize/trunk/README.html.in ============================================================================== --- wwwsearch/mechanize/trunk/README.html.in (original) +++ wwwsearch/mechanize/trunk/README.html.in Wed Jul 12 00:04:17 2006 @@ -296,8 +296,6 @@ integrate docstring and non-docstring docs.
  • Note BeautifulSoup 3.0 doesn't work yet. -
  • Add an "extra" to setup.py for BeautifulSoup (i.e. a declared - feature depending on BeautifulSoup)?
  • Document use of BeautifulSoup (RobustFactory).
  • Document means of processing response on ad-hoc basis with .set_response() - e.g. to fix bad encoding in Content-type header or @@ -571,7 +569,14 @@
  • Which version of Python do I need?

    2.3 or above.

  • What else do I need? -

    mechanize depends on ClientForm. +

    mechanize depends on ClientForm. The + setup.py script also declares a dependency on BeautifulSoup, + but there is no true dependency: the declaration is there only to avoid + confusing people who don't realise that mechanize is not compatible with + BeautifulSoup version 3 -- only BeautifulSoup version 2 is currently + supported. A future version of mechanize will support BeautifulSoup + version 3.

    The versions of those required modules are listed in the setup.py for mechanize (included with the download). The dependencies are automatically fetched by =0.2.2, ==dev"] +INSTALL_REQUIRES = [ + "ClientForm>=0.2.2, ==dev", + # there's no dependency on BeautifulSoup, but people get confused when + # BeautifulSoup 3 doesn't work + "BeautifulSoup>=2.1.1, <3.0", + ] NAME = "mechanize" PACKAGE = True LICENSE = "BSD" # or ZPL 2.1 From jjlee at codespeak.net Sat Jul 29 17:02:21 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 29 Jul 2006 17:02:21 +0200 (CEST) Subject: [wwwsearch-commits] r30731 - in wwwsearch/mechanize/trunk: . mechanize Message-ID: <20060729150221.9D18D1007B@code0.codespeak.net> Author: jjlee Date: Sat Jul 29 17:02:19 2006 New Revision: 30731 Modified: wwwsearch/mechanize/trunk/mechanize/_http.py wwwsearch/mechanize/trunk/test.py Log: Fix bug with quoted META Refresh URL (nilton.volpato at gmail.com); Clean up test.py a bit Modified: wwwsearch/mechanize/trunk/mechanize/_http.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_http.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_http.py Sat Jul 29 17:02:19 2006 @@ -400,6 +400,42 @@ https_request = http_request https_response = http_response + +def clean_refresh_url(url): + # e.g. Firefox 1.5 does (something like) this + if ((url.startswith('"') and url.endswith('"')) or + (url.startswith("'") and url.endswith("'"))): + return url[1:-1] + return url + +def parse_refresh_header(refresh): + """ + >>> parse_refresh_header("1; url=http://example.com/") + (1.0, 'http://example.com/') + >>> parse_refresh_header("1; url='http://example.com/'") + (1.0, 'http://example.com/') + >>> parse_refresh_header("1") + (1.0, None) + >>> parse_refresh_header("blah") + Traceback (most recent call last): + ValueError: invalid literal for float(): blah + + """ + + ii = refresh.find(";") + if ii != -1: + pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:] + jj = newurl_spec.find("=") + key = None + if jj != -1: + key, newurl = newurl_spec[:jj], newurl_spec[jj+1:] + newurl = clean_refresh_url(newurl) + if key is None or key.strip().lower() != "url": + raise ValueError() + else: + pause, newurl = float(refresh), None + return pause, newurl + class HTTPRefreshProcessor(BaseHandler): """Perform HTTP Refresh redirections. @@ -429,18 +465,13 @@ if code == 200 and hdrs.has_key("refresh"): refresh = hdrs.getheaders("refresh")[0] - ii = refresh.find(";") - if ii != -1: - pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:] - jj = newurl_spec.find("=") - key = None - if jj != -1: - key, newurl = newurl_spec[:jj], newurl_spec[jj+1:] - if key is None or key.strip().lower() != "url": - debug("bad Refresh header: %r" % refresh) - return response - else: - pause, newurl = float(refresh), response.geturl() + try: + pause, newurl = parse_refresh_header(refresh) + except ValueError: + debug("bad Refresh header: %r" % refresh) + return response + if newurl is None: + newurl = response.geturl() if (self.max_time is None) or (pause <= self.max_time): if pause > 1E-3 and self.honor_time: time.sleep(pause) Modified: wwwsearch/mechanize/trunk/test.py ============================================================================== --- wwwsearch/mechanize/trunk/test.py (original) +++ wwwsearch/mechanize/trunk/test.py Sat Jul 29 17:02:19 2006 @@ -14,7 +14,7 @@ "test_headers", "test_urllib2", "test_pullparser", ] -import sys, os, traceback, logging +import sys, os, traceback, logging, glob from unittest import defaultTestLoader, TextTestRunner, TestSuite, TestCase level = logging.DEBUG @@ -108,6 +108,8 @@ ## __builtin__.jjl = jjl # XXX temporary stop-gap to run doctests + + # import local copy of Python 2.5 doctest assert os.path.isdir("test") sys.path.insert(0, "test") # needed for recent doctest / linecache -- this is only for testing @@ -118,29 +120,43 @@ # that renamed module. sys.path.insert(0, "test-tools") import doctest + import mechanize + + # run .doctest files needing special support common_globs = {"mechanize": mechanize} + pm_doctest_filename = os.path.join("test", "test_password_manager.doctest") for globs in [ {"mgr_class": mechanize.HTTPPasswordMgr}, {"mgr_class": mechanize.HTTPProxyPasswordMgr}, ]: globs.update(common_globs) doctest.testfile( - os.path.join("test", "test_password_manager.doctest"), + pm_doctest_filename, #os.path.join("test", "test_scratch.doctest"), globs=globs, ) - - doctest.testfile(os.path.join("test", "test_rfc3986.doctest")) - doctest.testfile(os.path.join("test", "test_request.doctest")) - doctest.testfile(os.path.join("test", "test_history.doctest")) - doctest.testfile(os.path.join("test", "test_html.doctest")) - from mechanize import _headersutil, _auth, _clientcookie, _pullparser + + # run .doctest files + special_doctests = [pm_doctest_filename, + os.path.join("test", "test_scratch.doctest"), + ] + doctest_files = glob.glob(os.path.join("test", "*.doctest")) + for dt in special_doctests: + doctest_files.remove(dt) + for df in doctest_files: + doctest.testfile(df) + + # run doctests in docstrings + from mechanize import _headersutil, _auth, _clientcookie, _pullparser, \ + _http doctest.testmod(_headersutil) doctest.testmod(_auth) doctest.testmod(_clientcookie) doctest.testmod(_pullparser) + doctest.testmod(_http) + # run vanilla unittest tests import unittest test_path = os.path.join(os.path.dirname(sys.argv[0]), "test") sys.path.insert(0, test_path) From jjlee at codespeak.net Thu Aug 10 00:01:04 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 10 Aug 2006 00:01:04 +0200 (CEST) Subject: [wwwsearch-commits] r31210 - in wwwsearch/mechanize/trunk: docs mechanize test Message-ID: <20060809220104.8BC9310060@code0.codespeak.net> Author: jjlee Date: Thu Aug 10 00:01:03 2006 New Revision: 31210 Added: wwwsearch/mechanize/trunk/docs/ Modified: wwwsearch/mechanize/trunk/mechanize/_http.py wwwsearch/mechanize/trunk/test/test_urllib2.py Log: Fix HTTP-EQUIV with no content attribute case (Pratik Dam ); Fix assumption that httplib.HTTPMessage treats dict-style __setitem__ as append rather than set (where on earth did I get that from?) Modified: wwwsearch/mechanize/trunk/mechanize/_http.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_http.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_http.py Thu Aug 10 00:01:03 2006 @@ -148,7 +148,7 @@ http_equiv = self.unescape_attr_if_required(value) elif key == "content": content = self.unescape_attr_if_required(value) - if http_equiv is not None: + if http_equiv is not None and content is not None: self.http_equiv.append((http_equiv, content)) def end_head(self): @@ -280,9 +280,9 @@ def http_response(self, request, response): if not hasattr(response, "seek"): response = response_seek_wrapper(response) - headers = response.info() + http_message = response.info() url = response.geturl() - ct_hdrs = response.info().getheaders("content-type") + ct_hdrs = http_message.getheaders("content-type") if is_html(ct_hdrs, url, self._allow_xhtml): try: try: @@ -294,8 +294,11 @@ pass else: for hdr, val in html_headers: - # rfc822.Message interprets this as appending, not clobbering - headers[hdr] = val + # add a header + http_message.dict[hdr.lower()] = val + text = hdr + ": " + val + for line in text.split("\n"): + http_message.headers.append(line + "\n") return response https_response = http_response Modified: wwwsearch/mechanize/trunk/test/test_urllib2.py ============================================================================== --- wwwsearch/mechanize/trunk/test/test_urllib2.py (original) +++ wwwsearch/mechanize/trunk/test/test_urllib2.py Thu Aug 10 00:01:03 2006 @@ -9,7 +9,7 @@ # ProxyHandler, CustomProxy, CustomProxyHandler (I don't use a proxy) # GopherHandler (haven't used gopher for a decade or so...) -import unittest, StringIO, os, sys, UserDict +import unittest, StringIO, os, sys, UserDict, httplib import mechanize @@ -36,10 +36,18 @@ def readline(self, count=None): pass def close(self): pass -class MockHeaders(dict): - def getheaders(self, name): - name = name.lower() - return [v for k, v in self.iteritems() if name == k.lower()] +def http_message(mapping): + """ + >>> http_message({"Content-Type": "text/html"}).items() + [('content-type', 'text/html')] + + """ + f = [] + for kv in mapping.items(): + f.append("%s: %s" % kv) + f.append("") + msg = httplib.HTTPMessage(StringIO.StringIO("\r\n".join(f))) + return msg class MockResponse(StringIO.StringIO): def __init__(self, code, msg, headers, data, url=None): @@ -764,15 +772,19 @@ req = Request("http://example.com/") r = MockResponse( 200, "OK", - MockHeaders({"Foo": "Bar", "Content-type": "text/html"}), + http_message({"Foo": "Bar", + "Content-type": "text/html", + "Refresh": "blah"}), '' '' - '' + '', + "http://example.com/" ) newr = h.http_response(req, r) headers = newr.info() - self.assert_(headers["Refresh"] == "spam&eggs") self.assert_(headers["Foo"] == "Bar") + self.assert_(headers["Refresh"] == "spam&eggs") + self.assert_(headers.getheaders("Refresh") == ["blah", "spam&eggs"]) def test_refresh(self): # XXX test processor constructor optional args @@ -786,8 +798,8 @@ ]: o = h.parent = MockOpener() req = Request("http://example.com/") - headers = MockHeaders({"refresh": val}) - r = MockResponse(200, "OK", headers, "") + headers = http_message({"refresh": val}) + r = MockResponse(200, "OK", headers, "", "http://example.com/") newr = h.http_response(req, r) if valid: self.assertEqual(o.proto, "http") @@ -809,7 +821,7 @@ req.origin_req_host = "example.com" # XXX try: method(req, MockFile(), code, "Blah", - MockHeaders({"location": to_url})) + http_message({"location": to_url})) except mechanize.HTTPError: # 307 in response to POST requires user OK self.assert_(code == 307 and data is not None) @@ -825,7 +837,7 @@ # loop detection def redirect(h, req, url=to_url): h.http_error_302(req, MockFile(), 302, "Blah", - MockHeaders({"location": url})) + http_message({"location": url})) # Note that the *original* request shares the same record of # redirections with the sub-requests caused by the redirections. @@ -1025,7 +1037,10 @@ """, - [("refresh", "1; http://example.com/"), ("foo", "bar")]) + [("refresh", "1; http://example.com/"), ("foo", "bar")]), + (""" + """, + []) ] for html, result in htmls: self.assertEqual(parse_head(StringIO.StringIO(html), HeadParser()), result) @@ -1114,4 +1129,6 @@ if __name__ == "__main__": + import doctest + doctest.testmod() unittest.main() From jjlee at codespeak.net Thu Aug 10 00:03:29 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 10 Aug 2006 00:03:29 +0200 (CEST) Subject: [wwwsearch-commits] r31211 - wwwsearch/mechanize/trunk/docs Message-ID: <20060809220329.80E6910053@code0.codespeak.net> Author: jjlee Date: Thu Aug 10 00:03:28 2006 New Revision: 31211 Removed: wwwsearch/mechanize/trunk/docs/ Log: Remove accidental add of new directory docs From jjlee at codespeak.net Thu Aug 10 21:54:59 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 10 Aug 2006 21:54:59 +0200 (CEST) Subject: [wwwsearch-commits] r31244 - in wwwsearch/mechanize/trunk: . mechanize test Message-ID: <20060810195459.B483A10063@code0.codespeak.net> Author: jjlee Date: Thu Aug 10 21:54:58 2006 New Revision: 31244 Modified: wwwsearch/mechanize/trunk/functional_tests.py wwwsearch/mechanize/trunk/mechanize/_http.py wwwsearch/mechanize/trunk/test/test_urllib2.py Log: Use mechanize to open robots.txt; Don't consult RobotFileParser instance about non-HTTP URLs Modified: wwwsearch/mechanize/trunk/functional_tests.py ============================================================================== --- wwwsearch/mechanize/trunk/functional_tests.py (original) +++ wwwsearch/mechanize/trunk/functional_tests.py Thu Aug 10 21:54:58 2006 @@ -18,9 +18,10 @@ #from mechanize import CreateBSDDBCookieJar -## logger = logging.getLogger("mechanize") -## logger.addHandler(logging.StreamHandler()) -## logger.setLevel(logging.DEBUG) +import logging +logger = logging.getLogger("mechanize") +logger.addHandler(logging.StreamHandler()) +logger.setLevel(logging.DEBUG) def sanepathname2url(path): @@ -184,6 +185,16 @@ o.close() install_opener(None) + def test_robots(self): + plain_opener = mechanize.build_opener(mechanize.HTTPRobotRulesProcessor) + browser = mechanize.Browser() + for opener in plain_opener, browser: + r = opener.open("http://wwwsearch.sourceforge.net/robots") + self.assertEqual(r.code, 200) + self.assertRaises( + mechanize.RobotExclusionError, + opener.open, "http://wwwsearch.sourceforge.net/norobots") + def test_urlretrieve(self): url = "http://www.python.org/" verif = CallbackVerifier(self) Modified: wwwsearch/mechanize/trunk/mechanize/_http.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_http.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_http.py Thu Aug 10 21:54:58 2006 @@ -332,6 +332,46 @@ except ImportError: pass else: + class MechanizeRobotFileParser(robotparser.RobotFileParser): + + def __init__(self, url='', opener=None): + import _opener + robotparser.RobotFileParser.__init__(self, url) + self._opener = opener + + def set_opener(self, opener=None): + if opener is None: + opener = _opener.OpenerDirector() + self._opener = opener + + def read(self): + """Reads the robots.txt URL and feeds it to the parser.""" + if self._opener is None: + self.set_opener() + try: + f = self._opener.open(self.url) + except HTTPError, f: + pass + except (IOError, socket.error, OSError), exc: + robotparser._debug("ignoring error opening %r: %s" % + (self.url, exc)) + return + lines = [] + line = f.readline() + while line: + lines.append(line.strip()) + line = f.readline() + status = f.code + if status == 401 or status == 403: + self.disallow_all = True + robotparser._debug("disallow all") + elif status >= 400: + self.allow_all = True + robotparser._debug("allow all") + elif status == 200 and lines: + robotparser._debug("parse lines") + self.parse(lines) + class RobotExclusionError(urllib2.HTTPError): def __init__(self, request, *args): apply(urllib2.HTTPError.__init__, (self,)+args) @@ -349,16 +389,29 @@ else: http_response_class = HTTPMessage - def __init__(self, rfp_class=robotparser.RobotFileParser): + def __init__(self, rfp_class=MechanizeRobotFileParser): self.rfp_class = rfp_class self.rfp = None self._host = None def http_request(self, request): - host = request.get_host() scheme = request.get_type() + if scheme not in ["http", "https"]: + # robots exclusion only applies to HTTP + return request + + if request.get_selector() == "/robots.txt": + # /robots.txt is always OK to fetch + return request + + host = request.get_host() if host != self._host: self.rfp = self.rfp_class() + try: + self.rfp.set_opener(self.parent) + except AttributeError: + debug("%r instance does not support set_opener" % + self.rfp.__class__) self.rfp.set_url(scheme+"://"+host+"/robots.txt") self.rfp.read() self._host = host Modified: wwwsearch/mechanize/trunk/test/test_urllib2.py ============================================================================== --- wwwsearch/mechanize/trunk/test/test_urllib2.py (original) +++ wwwsearch/mechanize/trunk/test/test_urllib2.py Thu Aug 10 21:54:58 2006 @@ -394,6 +394,8 @@ return self def set_url(self, url): self.calls.append(("set_url", url)) + def set_opener(self, opener): + self.calls.append(("set_opener", opener)) def read(self): self.calls.append("read") def can_fetch(self, ua, url): @@ -669,8 +671,10 @@ return # skip test else: from mechanize import HTTPRobotRulesProcessor + opener = OpenerDirector() rfpc = MockRobotFileParserClass() h = HTTPRobotRulesProcessor(rfpc) + opener.add_handler(h) url = "http://example.com:80/foo/bar.html" req = Request(url) @@ -679,6 +683,7 @@ h.http_request(req) self.assert_(rfpc.calls == [ "__call__", + ("set_opener", opener), ("set_url", "http://example.com:80/robots.txt"), "read", ("can_fetch", "", url), @@ -718,6 +723,7 @@ h.http_request(req) self.assert_(rfpc.calls == [ "__call__", + ("set_opener", opener), ("set_url", "http://example.com/robots.txt"), "read", ("can_fetch", "", url), @@ -729,10 +735,17 @@ h.http_request(req) self.assert_(rfpc.calls == [ "__call__", + ("set_opener", opener), ("set_url", "https://example.org/robots.txt"), "read", ("can_fetch", "", url), ]) + # non-HTTP URL -> ignore robots.txt + rfpc.clear() + url = "ftp://example.com/" + req = Request(url) + h.http_request(req) + self.assert_(rfpc.calls == []) def test_cookies(self): cj = MockCookieJar() From jjlee at codespeak.net Thu Aug 10 22:11:45 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 10 Aug 2006 22:11:45 +0200 (CEST) Subject: [wwwsearch-commits] r31245 - wwwsearch/mechanize/trunk/test Message-ID: <20060810201145.16F7010063@code0.codespeak.net> Author: jjlee Date: Thu Aug 10 22:11:44 2006 New Revision: 31245 Removed: wwwsearch/mechanize/trunk/test/test_conncache.py Log: Remove unused test file Deleted: /wwwsearch/mechanize/trunk/test/test_conncache.py ============================================================================== --- /wwwsearch/mechanize/trunk/test/test_conncache.py Thu Aug 10 22:11:44 2006 +++ (empty file) @@ -1,14 +0,0 @@ -"""Tests for mechanize._ConnCache module.""" - -import unittest, sys - -class ConnCacheTests(unittest.TestCase): - - def test_ConnectionCache(self): - from mechanize import ConnectionCache - ConnectionCache() - - -if __name__ == "__main__": - #unittest.main() - pass From jjlee at codespeak.net Sat Aug 19 04:24:16 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 19 Aug 2006 04:24:16 +0200 (CEST) Subject: [wwwsearch-commits] r31398 - in wwwsearch/mechanize/trunk: mechanize test Message-ID: <20060819022416.4CD2310068@code0.codespeak.net> Author: jjlee Date: Sat Aug 19 04:24:14 2006 New Revision: 31398 Modified: wwwsearch/mechanize/trunk/mechanize/_html.py wwwsearch/mechanize/trunk/test/test_mechanize.py Log: Fix crash with tag (yajdbgr02 at sneakemail.com) Modified: wwwsearch/mechanize/trunk/mechanize/_html.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_html.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_html.py Sat Aug 19 04:24:14 2006 @@ -163,11 +163,11 @@ p = self.link_parser_class(response, encoding=encoding) for token in p.tags(*(self.urltags.keys()+["base"])): + if token.type == "endtag": + continue if token.data == "base": base_url = dict(token.attrs).get("href") continue - if token.type == "endtag": - continue attrs = dict(token.attrs) tag = token.data name = attrs.get("name") Modified: wwwsearch/mechanize/trunk/test/test_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/test/test_mechanize.py (original) +++ wwwsearch/mechanize/trunk/test/test_mechanize.py Sat Aug 19 04:24:14 2006 @@ -15,6 +15,17 @@ FACTORY_CLASSES.append(mechanize.RobustFactory) +class RegressionTests(TestCase): + + def test_close_base_tag(self): + # any document containing a tag used to cause an exception + br = mechanize.Browser() + response = mechanize.make_response( + "", [("Content-type", "text/html")], "", 200, "OK") + br.set_response(response) + list(br.links()) + + class CachingGeneratorFunctionTests(TestCase): def _get_simple_cgenf(self, log): From jjlee at codespeak.net Sat Sep 2 21:12:54 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 2 Sep 2006 21:12:54 +0200 (CEST) Subject: [wwwsearch-commits] r31955 - wwwsearch/release_scripts Message-ID: <20060902191254.928591006E@code0.codespeak.net> Author: jjlee Date: Sat Sep 2 21:12:52 2006 New Revision: 31955 Modified: wwwsearch/release_scripts/mrelease.py wwwsearch/release_scripts/release.py Log: New version of mechanize release script that should have been committed ages ago when the last release happened; Move web page menu items around a bit to reflect mechanize / pullparser / ClientCookie merger Modified: wwwsearch/release_scripts/mrelease.py ============================================================================== --- wwwsearch/release_scripts/mrelease.py (original) +++ wwwsearch/release_scripts/mrelease.py Sat Sep 2 21:12:52 2006 @@ -1,68 +1,75 @@ #!/usr/bin/env python -""" -mrelease.py version [branch] - -version is the version string, eg. "0.0.1a" -branch is the svn branch, eg. "trunk", or "branch/stable" - -""" - -import sys, os -from release import clean, check_versions, win_version, check_date, \ - REPOSITORY_URL, maketag, getargs +import sys, os, posixpath +from release import REPOSITORY_URL, getargs NAME = "mechanize" -def main(): - tag, version, branch, revision, pretend = getargs(sys.argv) +def main(argv): + import release + tag, rel, upload, version, branch, revision, options = getargs(argv) + + release.display_log_messages() + + pretend = options.pretend + + if branch == "tagged": + last_tag = release.gettags(NAME)[-1] + project_svn_base = posixpath.join(REPOSITORY_URL, NAME) + branch = release.relative_location(project_svn_base, last_tag) + print "using tagged branch", branch + + build_dir = os.path.abspath("build") + www = release.create_svn_www_wc_instance( + build_dir, + pretend=pretend) + common = release.create_svn_common_wc_instance( + build_dir, NAME, + pretend=pretend) + src = release.create_svn_proj_wc_instance( + build_dir, NAME, branch, revision, + pretend=pretend) + + sdist = release.SDist(NAME, src.fs_path, version, pretend=pretend) + + sdist.test_pythons = [(2,5), (2,4), (2,3)] + sdist.add_svn_working_copies([www, common, src]) + def readmes_builder(fn): + def build_readmes(pretend): + readme_html = release.empy( + fn, pretend=pretend) + readme_html_with_base = release.empy( + fn, defines=["base=True"], pretend=pretend) + readme_txt = release.lynx_dump(readme_html_with_base, + pretend=pretend) + return [readme_html, readme_txt] + return build_readmes + def empy_builder(fn): + def empy_build(pretend): + return [release.empy(fn, pretend=pretend)] + return empy_build + sdist.add_file_builders([ + readmes_builder(src.filename("README.html.in")), + empy_builder(www.filename("GeneralFAQ.html.in")), + empy_builder(src.filename("doc.html.in")), + ]) + sdist.add_files([ + (common.filename("ChangeLog"), src.filename("ChangeLog.txt")), + ]) + sdist.add_versioned_files([ + "%s/_%s.py" % (NAME, NAME), "ChangeLog.txt", "README.html", "setup.py"]) + sdist.add_uploadable_files([ + (src.filename("ChangeLog.txt"), None), + (src.filename("GeneralFAQ.html"), None), + (src.filename("README.html"), ("README-%s.html" % version)), + ]) if tag: - maketag(version, REPOSITORY_URL, NAME, branch, revision, pretend) - sys.exit() - - os.system("svn co %s/%s/%s %s" % (REPOSITORY_URL, NAME, branch, NAME)) - - os.system("svn co %s/www" % REPOSITORY_URL) - os.system("svn co %s/%s/common" % (REPOSITORY_URL, NAME)) - os.system("em.py www/GeneralFAQ.html.in > www/GeneralFAQ.html") - os.system("cp www/GeneralFAQ.html common/ChangeLog %s" % NAME) - - release_dir = "%s-%s" % (NAME, version) - os.mkdir(release_dir) - os.chdir(NAME) - os.system("em.py README.html.in > README.html") - bad_files = check_versions( - ["%s/_%s.py" % (NAME, NAME), "ChangeLog", "README.html", "setup.py"], - version) - if bad_files: - sys.exit("version doesn't match in %s" % " ".join(bad_files)) - os.system("lynx -dump README.html > README.txt") - os.system("cp README.html ../%s/README-%s.html" % ( - release_dir, win_version(version))) - os.system("cp ChangeLog ../%s/ChangeLog.txt" % release_dir) - os.system("rm MANIFEST") - os.system("cp ../www/GeneralFAQ.html ../%s/" % release_dir) - os.system("python setup.py sdist --formats=gztar,zip") - os.chdir("dist") - import glob - distutils_zip = glob.glob("%s-%s*.zip" % (NAME, version))[0] - zip = "%s-%s.zip" % (NAME, win_version(version)) - distutils_tarball = glob.glob("%s-%s*.tar.gz" % (NAME, version))[0] - tarball = "%s-%s.tar.gz" % (NAME, version) - import shutil - shutil.move(distutils_zip, os.path.join('..','..', release_dir, zip)) - shutil.move(distutils_tarball, os.path.join('..','..', release_dir, tarball)) - os.chdir("..") - os.system("rm -rf dist") - os.system("rm -rf build") - os.chdir("..") - print "tarring:" - os.system("tar -cvf %s.tar %s" % (release_dir, release_dir)) - os.system("rm -rf %s" % release_dir) - - os.chdir(NAME) - for py in ("python2.4", "python2.3", "python2.2"): - os.system("%s test.py" % py) + sdist.tag(src, clean=options.clean) + if rel: + sdist.build(update=options.update, clean=options.clean) + if upload: + sdist.upload_to_sourceforge() -main() +if __name__ == "__main__": + main(sys.argv) Modified: wwwsearch/release_scripts/release.py ============================================================================== --- wwwsearch/release_scripts/release.py (original) +++ wwwsearch/release_scripts/release.py Sat Sep 2 21:12:52 2006 @@ -332,9 +332,14 @@ chdir(cwd, pretend) return out_fn -def empy(filename, pretend=False): +def empy(filename, defines=None, pretend=False): + def_text = "" + if defines: + def_text = " %s " % (" ".join(["-D%s" % define for define in defines])) + def cmd(fn): + return system("em.py %s%s > %s" % (filename, def_text, fn), pretend) out_fn = wrap_command( - lambda fn: system("em.py %s > %s" % (filename, fn), pretend), + cmd, os.path.dirname(filename), os.path.splitext(os.path.basename(filename))[0], pretend=pretend, @@ -530,13 +535,15 @@ Page("GeneralFAQ", title="General FAQs", url="../bits/GeneralFAQ.html"), Sep(), - Page("mechanize"), - Page("pullparser"), - Page("ClientCookie", + Page("mechanize", child=Page("ccdocs", - title="ClientCookie docs", url="../ClientCookie/doc.html")), + title="mechanize docs", url="../mechanize/doc.html")), Page("ClientForm"), Sep(), + Page("ClientCookie", + child=Page("ccdocs", + title="ClientCookie docs", url="../ClientCookie/doc.html")), + Page("pullparser"), Page("DOMForm"), Page("python-spidermonkey"), Page("ClientTable"), From jjlee at codespeak.net Sat Sep 2 21:16:45 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 2 Sep 2006 21:16:45 +0200 (CEST) Subject: [wwwsearch-commits] r31956 - wwwsearch/release_scripts Message-ID: <20060902191645.218A51006E@code0.codespeak.net> Author: jjlee Date: Sat Sep 2 21:16:44 2006 New Revision: 31956 Modified: wwwsearch/release_scripts/colorize.py Log: Accept unicode strings as input (just str() them); Add unit test; Add a colorize_ex function that doesn't print; Don't use module string, where possible Modified: wwwsearch/release_scripts/colorize.py ============================================================================== --- wwwsearch/release_scripts/colorize.py (original) +++ wwwsearch/release_scripts/colorize.py Sat Sep 2 21:16:44 2006 @@ -37,22 +37,42 @@ def colorize(text): # for use with EmPy + print colorize_ex(text) + +def colorize_ex(text): from StringIO import StringIO + text = str(text) # we don't like unicode inp = StringIO(text) out = StringIO() Parser(inp.read(), out).format(None, None) - print out.getvalue() + return out.getvalue() class Parser: """ Send colored python source. + + >>> colorize('import blah\\n\\ndef foo():\\n blah()\\nprint "bye"\\n#comment\\n') +

    import blah
    +    
    +    def foo():
    +        blah()
    +    print "bye"
    +    #comment
    + + >>> colorize(u'import blah\\n\\ndef foo():\\n blah()\\nprint "bye"\\n#comment\\n') +
    import blah
    +    
    +    def foo():
    +        blah()
    +    print "bye"
    +    #comment
    + """ def __init__(self, raw, out = sys.stdout): """ Store the source text. """ - #self.raw = string.strip(string.expandtabs(raw)) - self.raw = string.rstrip(string.expandtabs(raw)) + self.raw = string.expandtabs(raw).rstrip() self.out = out def format(self, formatter, form): @@ -62,7 +82,7 @@ self.lines = [0, 0] pos = 0 while 1: - pos = string.find(self.raw, '\n', pos) + 1 + pos = self.raw.find('\n', pos) + 1 if not pos: break self.lines.append(pos) self.lines.append(len(self.raw)) @@ -125,7 +145,11 @@ self.out.write('') -def testmain(): +def test_main(): + import doctest + doctest.testmod() + +def demo(): import os, sys print "Formatting..." @@ -143,4 +167,4 @@ if __name__ == "__main__": - testmain() + test_main() From jjlee at codespeak.net Sat Sep 2 21:22:53 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 2 Sep 2006 21:22:53 +0200 (CEST) Subject: [wwwsearch-commits] r31958 - wwwsearch/release_scripts Message-ID: <20060902192253.472E11006E@code0.codespeak.net> Author: jjlee Date: Sat Sep 2 21:22:52 2006 New Revision: 31958 Modified: wwwsearch/release_scripts/epydocsimplehtml.py Log: 22 Modified: wwwsearch/release_scripts/epydocsimplehtml.py ============================================================================== --- wwwsearch/release_scripts/epydocsimplehtml.py (original) +++ wwwsearch/release_scripts/epydocsimplehtml.py Sat Sep 2 21:22:52 2006 @@ -1,3 +1,5 @@ +# This is a hacked copy of epydoc's html.py, that writes simpler HTML + # # epydoc -- HTML output generator # Edward Loper @@ -29,6 +31,71 @@ from epydoc.util import plaintext_to_html, is_src_filename from epydoc.compat import * # Backwards compatibility +class MechanizeNameHelper: + + + # things docutils HTML formatter does that we want to get rid of: + + # (most of this is achieved via hacking of this file, copied from epydoc, + # but this class helps out with those hacks) + + # private module names in links to class doc pages + # private module names in breadcrumbs + # modules pages (apart from mechanize-module.html) + # in mechanize-module.html, remove "submodules" listing at the top + # frames.html (also need to remove link to frameset / non-frameset versions at top) + # index.html + # toc.html + # toc-everything.html + # toc-*.html + # non-mechanize source code + # module-tree.html (also need to hack bar at the top to s/Trees/Tree/) + # indices (also need to hack bar at the top to remove Indices link) + # help.html (also need to remove help link at the top) + # private module names in HTML file names + # private module names in class trees + + def __init__(self): + self._names = set() + add = self._names.add + import mechanize + private_modules = [m[:-3] for m in + os.listdir(os.path.dirname(mechanize.__file__)) if + m.startswith("_") and m.endswith(".py")] + for name in private_modules: + add(name) + + def want_source_code(self, cname): + return cname[0] == 'mechanize' + + def private_name(self, cname): + try: + module_name = cname[1] + except IndexError: + return False + else: + return module_name.startswith("_") + + def munge(self, label): + # Munge to remove private modules + if label.startswith("mechanize."): + label = label[len("mechanize."):] + + if not label.startswith("_"): + return label + + names = self._names + ii = label.find(".") + if ii < 0: + return label + + first_part = label[:ii] + if first_part not in names: + return label + + return 'mechanize'+label[ii:] +munger = MechanizeNameHelper() + ###################################################################### ## Template Compiler ###################################################################### @@ -391,8 +458,9 @@ be included in the index.""" # URL for 'trees' page - if self.module_list: self._trees_url = 'module-tree.html' - else: self._trees_url = 'class-tree.html' +## if self.module_list: self._trees_url = 'module-tree.html' +## else: self._trees_url = 'class-tree.html' + self._trees_url = 'class-tree.html' # Construct the value for self.indexed_docs. self.indexed_docs += [d for d in valdocs @@ -417,7 +485,8 @@ # reporting). self.modules_with_sourcecode = set() for doc in self.module_list: - if isinstance(doc, ModuleDoc) and is_src_filename(doc.filename): + if (isinstance(doc, ModuleDoc) and is_src_filename(doc.filename) and + munger.want_source_code(doc.canonical_name)): self.modules_with_sourcecode.add(doc) self._num_files = (len(self.class_list) + 2*len(self.module_list) + 11 + len(self.METADATA_INDICES)) @@ -526,75 +595,76 @@ for (name, label, label2) in self.METADATA_INDICES: indices[name] = self.build_metadata_index(name) - # Write the identifier index. If requested, split it into - # separate pages for each letter. - ident_by_letter = self._group_by_letter(indices['ident']) - if not self._split_ident_index: - self._write(self.write_link_index, directory, - 'identifier-index.html', indices, - 'Identifier Index', 'identifier-index.html', - ident_by_letter) - else: - # Write a page for each section. - for letter in self.LETTERS: - filename = 'identifier-index-%s.html' % letter - self._write(self.write_link_index, directory, filename, - indices, 'Identifier Index', filename, - ident_by_letter, [letter], - 'identifier-index-%s.html') - # Use the first non-empty section as the main index page. - for letter in self.LETTERS: - if letter in ident_by_letter: - filename = 'identifier-index.html' - self._write(self.write_link_index, directory, filename, - indices, 'Identifier Index', filename, - ident_by_letter, [letter], - 'identifier-index-%s.html') - break - - # Write the term index. - if indices['term']: - term_by_letter = self._group_by_letter(indices['term']) - self._write(self.write_link_index, directory, 'term-index.html', - indices, 'Term Definition Index', - 'term-index.html', term_by_letter) - else: - self._files_written += 1 # (skipped) - - # Write the metadata indices. - for (name, label, label2) in self.METADATA_INDICES: - if indices[name]: - self._write(self.write_metadata_index, directory, - '%s-index.html' % name, indices, name, - label, label2) - else: - self._files_written += 1 # (skipped) - - # Write the trees file (package & class hierarchies) - if self.module_list: - self._write(self.write_module_tree, directory, 'module-tree.html') - else: - self._files_written += 1 # (skipped) +## # Write the identifier index. If requested, split it into +## # separate pages for each letter. +## ident_by_letter = self._group_by_letter(indices['ident']) +## if not self._split_ident_index: +## self._write(self.write_link_index, directory, +## 'identifier-index.html', indices, +## 'Identifier Index', 'identifier-index.html', +## ident_by_letter) +## else: +## # Write a page for each section. +## for letter in self.LETTERS: +## filename = 'identifier-index-%s.html' % letter +## self._write(self.write_link_index, directory, filename, +## indices, 'Identifier Index', filename, +## ident_by_letter, [letter], +## 'identifier-index-%s.html') +## # Use the first non-empty section as the main index page. +## for letter in self.LETTERS: +## if letter in ident_by_letter: +## filename = 'identifier-index.html' +## self._write(self.write_link_index, directory, filename, +## indices, 'Identifier Index', filename, +## ident_by_letter, [letter], +## 'identifier-index-%s.html') +## break + +## # Write the term index. +## if indices['term']: +## term_by_letter = self._group_by_letter(indices['term']) +## self._write(self.write_link_index, directory, 'term-index.html', +## indices, 'Term Definition Index', +## 'term-index.html', term_by_letter) +## else: +## self._files_written += 1 # (skipped) + +## # Write the metadata indices. +## for (name, label, label2) in self.METADATA_INDICES: +## if indices[name]: +## self._write(self.write_metadata_index, directory, +## '%s-index.html' % name, indices, name, +## label, label2) +## else: +## self._files_written += 1 # (skipped) + +## # Write the trees file (package & class hierarchies) +## if self.module_list: +## self._write(self.write_module_tree, directory, 'module-tree.html') +## else: +## self._files_written += 1 # (skipped) if self.class_list: self._write(self.write_class_tree, directory, 'class-tree.html') else: self._files_written += 1 # (skipped) # Write the help file. - self._write(self.write_help, directory,'help.html') +## self._write(self.write_help, directory,'help.html') # Write the frames-based table of contents. - self._write(self.write_frames_index, directory, 'frames.html') - self._write(self.write_toc, directory, 'toc.html') - self._write(self.write_project_toc, directory, 'toc-everything.html') - for doc in self.module_list: - filename = 'toc-%s' % urllib.unquote(self.url(doc)) - self._write(self.write_module_toc, directory, filename, doc) +## self._write(self.write_frames_index, directory, 'frames.html') +## self._write(self.write_toc, directory, 'toc.html') +## self._write(self.write_project_toc, directory, 'toc-everything.html') +## for doc in self.module_list: +## filename = 'toc-%s' % urllib.unquote(self.url(doc)) +## self._write(self.write_module_toc, directory, filename, doc) # Write the object documentation. for doc in self.module_list: - filename = urllib.unquote(self.url(doc)) - self._write(self.write_module, directory, filename, doc) + if doc.is_package is True: + filename = urllib.unquote(self.url(doc)) + self._write(self.write_module, directory, filename, doc) for doc in self.class_list: filename = urllib.unquote(self.url(doc)) self._write(self.write_class, directory, filename, doc) @@ -620,9 +690,9 @@ # Write the index.html files. # (this must be done last, since it might copy another file) - self._files_written += 1 - log.progress(self._files_written/self._num_files, 'index.html') - self.write_homepage(directory) +## self._files_written += 1 +## log.progress(self._files_written/self._num_files, 'index.html') +## self.write_homepage(directory) # Report any failed crossreferences if self._failed_xrefs: @@ -705,7 +775,7 @@ self.write_standard_fields(out, doc) # If it's a package, then list the modules it contains. - if doc.is_package is True: + if 0:#doc.is_package is True: self.write_module_list(out, doc) # Write summary tables describing the variables that the @@ -927,7 +997,7 @@ self.write_header(out, title) self.write_navbar(out, 'trees') self.write_breadcrumbs(out, 'trees', url) - if self.class_list and self.module_list: + if 0:#self.class_list and self.module_list: out('
    \n') out(' [ Module Hierarchy\n') out(' | Class Hierarchy ]\n') @@ -1635,28 +1705,10 @@ >>> if context == "trees":    Trees    - >>> else: -    Trees    - >>> #endif - - - >>> if context == "indices": -    Indices    - >>> else: -    Indices    - >>> #endif - - - >>> if context == "help": -    Help    + >   Tree    >>> else:    Help    + href="$self._trees_url$">Tree    >>> #endif >>> if self._prj_link: @@ -1715,10 +1767,6 @@ >>> if self._show_private: $self.PRIVATE_LINK$ >>> #endif - [frames] | no frames] @@ -1744,7 +1792,9 @@ else: label = self._crumb(container) name = container.canonical_name - crumbs.insert(0, self.href(container, label)) # [xx] code=0?? + #if self.doc_kind(container) != 'Module': + if not munger.private_name(name): + crumbs.insert(0, self.href(container, label)) # [xx] code=0?? doc = container def _crumb(self, doc): @@ -2453,7 +2503,7 @@ else: s = '' for i in range(len(bases)-1, -1, -1): base = bases[i] - label = self.contextual_label(base, context) + label = munger.munge(self.contextual_label(base, context)) s = (' '*(width-4-len(label)) + self.href(base, label) +' --+'+postfix+'\n' + ' '*(width-4) + @@ -2925,11 +2975,11 @@ # Module: -module.html if isinstance(obj, ModuleDoc): if obj not in self.module_set: return None - return urllib.quote('%s'%obj.canonical_name) + '-module.html' + return urllib.quote(munger.munge('%s'%obj.canonical_name)) + '-module.html' # Class: -class.html elif isinstance(obj, ClassDoc): if obj not in self.class_set: return None - return urllib.quote('%s'%obj.canonical_name) + '-class.html' + return urllib.quote(munger.munge('%s'%obj.canonical_name)) + '-class.html' # Variable elif isinstance(obj, VariableDoc): val_doc = obj.value @@ -2991,7 +3041,7 @@ elif isinstance(api_doc, ModuleDoc): if api_doc in self.modules_with_sourcecode: return ('%s-pysrc.html' % - urllib.quote('%s' % api_doc.canonical_name)) + urllib.quote(munger.munge('%s' % api_doc.canonical_name))) else: return None else: @@ -3045,6 +3095,7 @@ if label.startswith('??'): label = 'unreachable' + label[2:] label = re.sub(r'-\d+$', '', label) + label = munger.munge(label) # Get the url for the target. url = self.url(target) From jjlee at codespeak.net Sat Sep 2 21:34:17 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 2 Sep 2006 21:34:17 +0200 (CEST) Subject: [wwwsearch-commits] r31959 - wwwsearch/release_scripts Message-ID: <20060902193417.058AF1006E@code0.codespeak.net> Author: jjlee Date: Sat Sep 2 21:34:16 2006 New Revision: 31959 Modified: wwwsearch/release_scripts/epydocsimplehtml.py Log: Oops, committed (31958) before I typed the message again :-( Message should have been: Add hacks to epydoc API doc HTML writer to simplify output and hide private module names &c. (e.g. mechanize._mechanize module does not need its own documentation -- instead, just the stuff exported by the package should be documented). To do: write some CSS that gives output that's not so damn ugly (maybe get rid of the HTML tables too), integrate with rest of site and write static website build/promotion scripts... and update the docs ;-) (using epytext for API docs and ReST for web pages / other discursive documentation) Modified: wwwsearch/release_scripts/epydocsimplehtml.py ============================================================================== --- wwwsearch/release_scripts/epydocsimplehtml.py (original) +++ wwwsearch/release_scripts/epydocsimplehtml.py Sat Sep 2 21:34:16 2006 @@ -1,4 +1,4 @@ -# This is a hacked copy of epydoc's html.py, that writes simpler HTML +# This is a hacked copy of epydoc's html.py, that writes simpler HTML. # # epydoc -- HTML output generator From jjlee at codespeak.net Sat Sep 2 22:40:07 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 2 Sep 2006 22:40:07 +0200 (CEST) Subject: [wwwsearch-commits] r31961 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20060902204007.8163D1006E@code0.codespeak.net> Author: jjlee Date: Sat Sep 2 22:40:05 2006 New Revision: 31961 Modified: wwwsearch/mechanize/trunk/mechanize/__init__.py Log: Add an __all__ to mechanize's __init__.py Modified: wwwsearch/mechanize/trunk/mechanize/__init__.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/__init__.py (original) +++ wwwsearch/mechanize/trunk/mechanize/__init__.py Sat Sep 2 22:40:05 2006 @@ -1,3 +1,82 @@ +__all__ = [ + 'AbstractBasicAuthHandler', + 'AbstractDigestAuthHandler', + 'BaseHandler', + 'Browser', + 'BrowserStateError', + 'CacheFTPHandler', + 'Cookie', + 'CookieJar', + 'CookiePolicy', + 'DefaultCookiePolicy', + 'DefaultFactory', + 'FTPHandler', + 'Factory', + 'FileCookieJar', + 'FileHandler', + 'FormNotFoundError', + 'FormsFactory', + 'GopherError', + 'GopherHandler', + 'HTTPBasicAuthHandler', + 'HTTPCookieProcessor', + 'HTTPDefaultErrorHandler', + 'HTTPDigestAuthHandler', + 'HTTPEquivProcessor', + 'HTTPError', + 'HTTPErrorProcessor', + 'HTTPHandler', + 'HTTPPasswordMgr', + 'HTTPPasswordMgrWithDefaultRealm', + 'HTTPProxyPasswordMgr', + 'HTTPRedirectDebugProcessor', + 'HTTPRedirectHandler', + 'HTTPRefererProcessor', + 'HTTPRefreshProcessor', + 'HTTPRequestUpgradeProcessor', + 'HTTPResponseDebugProcessor', + 'HTTPRobotRulesProcessor', + 'HTTPSClientCertMgr', + 'HTTPSHandler', + 'HeadParser', + 'History', + 'LWPCookieJar', + 'Link', + 'LinkNotFoundError', + 'LinksFactory', + 'LoadError', + 'MSIECookieJar', + 'MozillaCookieJar', + 'OpenerDirector', + 'OpenerFactory', + 'ProxyBasicAuthHandler', + 'ProxyDigestAuthHandler', + 'ProxyHandler', + 'Request', + 'ResponseUpgradeProcessor', + 'RobotExclusionError', + 'RobustFactory', + 'RobustFormsFactory', + 'RobustLinksFactory', + 'RobustTitleFactory', + 'SeekableProcessor', + 'TitleFactory', + 'URLError', + 'USE_BARE_EXCEPT', + 'UnknownHandler', + 'UserAgent', + 'XHTMLCompatibleHeadParser', + '__version__', + 'build_opener', + 'install_opener', + 'lwp_cookie_str', + 'make_response', + 'request_host', + 'response_seek_wrapper', + 'str2time', + 'urlopen', + 'urlretrieve'] + from _mechanize import __version__ # high-level stateful browser-style interface From jjlee at codespeak.net Sat Sep 2 23:11:10 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 2 Sep 2006 23:11:10 +0200 (CEST) Subject: [wwwsearch-commits] r31962 - wwwsearch/mechanize/trunk Message-ID: <20060902211110.F0BB21006E@code0.codespeak.net> Author: jjlee Date: Sat Sep 2 23:11:08 2006 New Revision: 31962 Modified: wwwsearch/mechanize/trunk/doc.html.in Log: Fix closing tag Modified: wwwsearch/mechanize/trunk/doc.html.in ============================================================================== --- wwwsearch/mechanize/trunk/doc.html.in (original) +++ wwwsearch/mechanize/trunk/doc.html.in Sat Sep 2 23:11:08 2006 @@ -479,7 +479,7 @@ """)}
  • If you're using a urllib2.Request from Python 2.4 or later, - or you're using a mechanize.Request, use the + or you're using a mechanize.Request, use the unverifiable and origin_req_host arguments to the constructor: From jjlee at codespeak.net Fri Sep 8 23:19:28 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Fri, 8 Sep 2006 23:19:28 +0200 (CEST) Subject: [wwwsearch-commits] r32090 - wwwsearch/mechanize/trunk Message-ID: <20060908211928.AB2DA10077@code0.codespeak.net> Author: jjlee Date: Fri Sep 8 23:19:26 2006 New Revision: 32090 Modified: wwwsearch/mechanize/trunk/doc.html.in Log: Fix bad HTML Modified: wwwsearch/mechanize/trunk/doc.html.in ============================================================================== --- wwwsearch/mechanize/trunk/doc.html.in (original) +++ wwwsearch/mechanize/trunk/doc.html.in Fri Sep 8 23:19:26 2006 @@ -718,7 +718,7 @@ keep compatibility with the Netscape protocol as implemented by Netscape. Microsoft Internet Explorer (MSIE) was very new when the standard was designed, but was starting to be very popular when the standard was finalised. XXX P3P, -and MSIE & Mozilla options +and MSIE & Mozilla options

    XXX Apparently MSIE implements bits of RFC 2109 - but not very compliant (surprise). Presumably other browsers do too, as a result. mechanize @@ -863,7 +863,7 @@
    Examples
    -Mozilla & MSIE
    +Mozilla & MSIE
    Cookies in a file
    Using a CookieJar
    Processors
    From jjlee at codespeak.net Thu Sep 14 00:52:18 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 14 Sep 2006 00:52:18 +0200 (CEST) Subject: [wwwsearch-commits] r32288 - wwwsearch/mechanize/trunk Message-ID: <20060913225218.98A811007B@code0.codespeak.net> Author: jjlee Date: Thu Sep 14 00:52:17 2006 New Revision: 32288 Modified: wwwsearch/mechanize/trunk/functional_tests.py Log: Comment out log printing in functional tests Modified: wwwsearch/mechanize/trunk/functional_tests.py ============================================================================== --- wwwsearch/mechanize/trunk/functional_tests.py (original) +++ wwwsearch/mechanize/trunk/functional_tests.py Thu Sep 14 00:52:17 2006 @@ -18,10 +18,10 @@ #from mechanize import CreateBSDDBCookieJar -import logging -logger = logging.getLogger("mechanize") -logger.addHandler(logging.StreamHandler()) -logger.setLevel(logging.DEBUG) +## import logging +## logger = logging.getLogger("mechanize") +## logger.addHandler(logging.StreamHandler()) +## logger.setLevel(logging.DEBUG) def sanepathname2url(path): From jjlee at codespeak.net Thu Sep 14 00:53:07 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 14 Sep 2006 00:53:07 +0200 (CEST) Subject: [wwwsearch-commits] r32289 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20060913225307.18CD61007B@code0.codespeak.net> Author: jjlee Date: Thu Sep 14 00:53:06 2006 New Revision: 32289 Modified: wwwsearch/mechanize/trunk/mechanize/_response.py Log: Correct over-enthusiastic guarantees of closeable_response Modified: wwwsearch/mechanize/trunk/mechanize/_response.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_response.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_response.py Thu Sep 14 00:53:06 2006 @@ -245,9 +245,6 @@ .read() .readline() - .readlines() - .seek() - .tell() .info() .geturl() .__iter__() @@ -282,10 +279,8 @@ self.fileno = self.fp.fileno else: self.fileno = lambda: None - if hasattr(self.fp, "__iter__"): - self.__iter__ = self.fp.__iter__ - if hasattr(self.fp, "next"): - self.next = self.fp.next + self.__iter__ = self.fp.__iter__ + self.next = self.fp.next def __repr__(self): return '<%s at %s whose fp = %r>' % ( From jjlee at codespeak.net Thu Sep 14 02:19:14 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 14 Sep 2006 02:19:14 +0200 (CEST) Subject: [wwwsearch-commits] r32290 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20060914001914.6E8E710080@code0.codespeak.net> Author: jjlee Date: Thu Sep 14 02:19:12 2006 New Revision: 32290 Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py Log: Temporary work-around for problem with partially-read responses in History (duncan.booth at suttoncourtenay.org.uk) Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Thu Sep 14 02:19:12 2006 @@ -168,6 +168,14 @@ ## # acceptable. ## raise self.set_response(response) + + # XXX + # Temporary hack to eagerly read data (otherwise, History can contain + # closed and partially-read responses). Proper fix is for responses to + # know if they're partially read or not; .back() should then .reload() + # if required. + response.get_data() + if not success: raise error return copy.copy(self._response) From jjlee at codespeak.net Thu Sep 14 02:57:51 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 14 Sep 2006 02:57:51 +0200 (CEST) Subject: [wwwsearch-commits] r32291 - wwwsearch/mechanize/trunk/test Message-ID: <20060914005751.F40E410080@code0.codespeak.net> Author: jjlee Date: Thu Sep 14 02:57:50 2006 New Revision: 32291 Modified: wwwsearch/mechanize/trunk/test/test_mechanize.py Log: Fix test failure Modified: wwwsearch/mechanize/trunk/test/test_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/test/test_mechanize.py (original) +++ wwwsearch/mechanize/trunk/test/test_mechanize.py Thu Sep 14 02:57:50 2006 @@ -96,7 +96,7 @@ self.assertEqual(got, expect) -# XXX these 'mock' classes are badly in need of simplification +# XXX these 'mock' classes are badly in need of simplification / removal class MockMethod: def __init__(self, meth_name, action, handle): self.meth_name = meth_name @@ -125,6 +125,7 @@ assert whence == 0 self.fp.seek(0) def close(self): pass + def get_data(self): pass def __getstate__(self): state = self.__dict__ state['source'] = self.source From jjlee at codespeak.net Sun Sep 17 00:09:45 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sun, 17 Sep 2006 00:09:45 +0200 (CEST) Subject: [wwwsearch-commits] r32397 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20060916220945.CA6B710071@code0.codespeak.net> Author: jjlee Date: Sun Sep 17 00:09:44 2006 New Revision: 32397 Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py wwwsearch/mechanize/trunk/mechanize/_response.py Log: HTTPError didn't support .get_data() or .seek() -- fix that by upgrading it to a closeable_response; Don't treat any non-response HTTPErrors as responses (not sure if any of these '.fp is None' HTTPErrors are actually raised any more, but urllib2.HTTPError's c'tor knows about this case); Apply eager-read workaround to .set_response() (not just .open()); Don't multiple-.seek() wrap Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Sun Sep 17 00:09:44 2006 @@ -155,6 +155,8 @@ response = UserAgent.open(self, self.request, data) except urllib2.HTTPError, error: success = False + if error.fp is None: # not a response + raise response = error ## except (IOError, socket.error, OSError), error: ## # Yes, urllib2 really does raise all these :-(( @@ -169,16 +171,11 @@ ## raise self.set_response(response) - # XXX - # Temporary hack to eagerly read data (otherwise, History can contain - # closed and partially-read responses). Proper fix is for responses to - # know if they're partially read or not; .back() should then .reload() - # if required. - response.get_data() + response = copy.copy(self._response) if not success: - raise error - return copy.copy(self._response) + raise response + return response def __str__(self): text = [] @@ -209,15 +206,15 @@ raise ValueError("not a response object") self.form = None + self._response = _upgrade.upgrade_response(response) - if not hasattr(response, "seek"): - response = response_seek_wrapper(response) - if not hasattr(response, "closeable_response"): - response = _upgrade.upgrade_response(response) - else: - response = copy.copy(response) + # XXX + # Temporary hack to eagerly read data (otherwise, History can contain + # closed and partially-read responses). Proper fix is for responses to + # know if they're partially read or not; .back() should then .reload() + # if required. + self._response.get_data() - self._response = response self._factory.set_response(self._response) def geturl(self): Modified: wwwsearch/mechanize/trunk/mechanize/_response.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_response.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_response.py Sun Sep 17 00:09:44 2006 @@ -10,6 +10,7 @@ import copy, mimetools from cStringIO import StringIO +from urllib2 import HTTPError # XXX Andrew Dalke kindly sent me a similar class in response to my request on # comp.lang.python, which I then proceeded to lose. I wrote this class @@ -218,6 +219,27 @@ self.seek(0) +class httperror_seek_wrapper(response_seek_wrapper, HTTPError): + + # this only derives from HTTPError in order to be a subclass -- + # the HTTPError behaviour comes from delegation + + def __init__(self, wrapped): + assert isinstance(wrapped, closeable_response), wrapped + response_seek_wrapper.__init__(self, wrapped) + # be compatible with undocumented HTTPError attributes :-( + self.hdrs = wrapped._headers + self.filename = wrapped._url + + # we don't want the HTTPError implementation of these + + def geturl(self): + return self.wrapped.geturl() + + def close(self): + self.wrapped.close() + + class eoffile: # file-like object that always claims to be at end-of-file... def read(self, size=-1): return "" @@ -338,12 +360,27 @@ r = closeable_response(StringIO(data), mime_headers, url, code, msg) return response_seek_wrapper(r) + # Horrible, but needed, at least until fork urllib2. Even then, may want # to preseve urllib2 compatibility. def upgrade_response(response): + """Return a copy of response that supports mechanize response interface. + + Accepts responses from both mechanize and urllib2 handlers. + """ + if isinstance(response, HTTPError): + wrapper_class = httperror_seek_wrapper + else: + wrapper_class = response_seek_wrapper + + if hasattr(response, "closeable_response"): + if not hasattr(response, "seek"): + response = wrapper_class(response) + return copy.copy(response) + # a urllib2 handler constructed the response, i.e. the response is an - # urllib.addinfourl, instead of a _Util.closeable_response as returned - # by e.g. mechanize.HTTPHandler + # urllib.addinfourl or a urllib2.HTTPError, instead of a + # _Util.closeable_response as returned by e.g. mechanize.HTTPHandler try: code = response.code except AttributeError: @@ -361,7 +398,7 @@ response = closeable_response( response.fp, response.info(), response.geturl(), code, msg) - response = response_seek_wrapper(response) + response = wrapper_class(response) if data: response.set_data(data) return response From jjlee at codespeak.net Sun Sep 17 00:12:12 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sun, 17 Sep 2006 00:12:12 +0200 (CEST) Subject: [wwwsearch-commits] r32398 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20060916221212.B9CA610071@code0.codespeak.net> Author: jjlee Date: Sun Sep 17 00:12:11 2006 New Revision: 32398 Modified: wwwsearch/mechanize/trunk/mechanize/_html.py Log: Fix harmless but confusing typo Modified: wwwsearch/mechanize/trunk/mechanize/_html.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_html.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_html.py Sun Sep 17 00:12:11 2006 @@ -546,7 +546,7 @@ self._forms_factory.set_response( copy.copy(response), self.encoding) self._links_factory.set_response( - copy.copy(response), self._response.geturl(), self.encoding) + copy.copy(response), response.geturl(), self.encoding) self._title_factory.set_response( copy.copy(response), self.encoding) From jjlee at codespeak.net Sun Sep 17 01:32:13 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sun, 17 Sep 2006 01:32:13 +0200 (CEST) Subject: [wwwsearch-commits] r32399 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20060916233213.DAB5510071@code0.codespeak.net> Author: jjlee Date: Sun Sep 17 01:32:09 2006 New Revision: 32399 Modified: wwwsearch/mechanize/trunk/mechanize/_response.py Log: Fix catching HTTPError subclasses while still preserving all their response behaviour, with some somewhat-evil runtime class creation trickery Modified: wwwsearch/mechanize/trunk/mechanize/_response.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_response.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_response.py Sun Sep 17 01:32:09 2006 @@ -10,7 +10,7 @@ import copy, mimetools from cStringIO import StringIO -from urllib2 import HTTPError +import urllib2 # XXX Andrew Dalke kindly sent me a similar class in response to my request on # comp.lang.python, which I then proceeded to lose. I wrote this class @@ -219,27 +219,6 @@ self.seek(0) -class httperror_seek_wrapper(response_seek_wrapper, HTTPError): - - # this only derives from HTTPError in order to be a subclass -- - # the HTTPError behaviour comes from delegation - - def __init__(self, wrapped): - assert isinstance(wrapped, closeable_response), wrapped - response_seek_wrapper.__init__(self, wrapped) - # be compatible with undocumented HTTPError attributes :-( - self.hdrs = wrapped._headers - self.filename = wrapped._url - - # we don't want the HTTPError implementation of these - - def geturl(self): - return self.wrapped.geturl() - - def close(self): - self.wrapped.close() - - class eoffile: # file-like object that always claims to be at end-of-file... def read(self, size=-1): return "" @@ -368,7 +347,25 @@ Accepts responses from both mechanize and urllib2 handlers. """ - if isinstance(response, HTTPError): + if isinstance(response, urllib2.HTTPError): + class httperror_seek_wrapper(response_seek_wrapper, response.__class__): + # this only derives from HTTPError in order to be a subclass -- + # the HTTPError behaviour comes from delegation + + def __init__(self, wrapped): + assert isinstance(wrapped, closeable_response), wrapped + response_seek_wrapper.__init__(self, wrapped) + # be compatible with undocumented HTTPError attributes :-( + self.hdrs = wrapped._headers + self.filename = wrapped._url + + # we don't want the HTTPError implementation of these + + def geturl(self): + return self.wrapped.geturl() + + def close(self): + self.wrapped.close() wrapper_class = httperror_seek_wrapper else: wrapper_class = response_seek_wrapper From jjlee at codespeak.net Mon Sep 18 00:19:05 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Mon, 18 Sep 2006 00:19:05 +0200 (CEST) Subject: [wwwsearch-commits] r32429 - wwwsearch/release_scripts Message-ID: <20060917221905.1551D10071@code0.codespeak.net> Author: jjlee Date: Mon Sep 18 00:19:03 2006 New Revision: 32429 Added: wwwsearch/release_scripts/restextensions.py Log: Initial import of some reST extensions for use by new docs (mostly pinched from other people's code) Added: wwwsearch/release_scripts/restextensions.py ============================================================================== --- (empty file) +++ wwwsearch/release_scripts/restextensions.py Mon Sep 18 00:19:03 2006 @@ -0,0 +1,263 @@ +# Modified syntax-highlighting directive support taken from Michael Alyn +# Miller's restxsl. The actual syntax highlighting code itself has been +# replaced. +# XXX +# This is still in a semi-working state +# mech_name &c. is pinched from MochiKit and does not work yet + +# Copyright (c) 2006, Michael Alyn Miller . +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# 1. Redistributions of source code must retain the above copyright +# notice unmodified, this list of conditions, and the following +# disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of Michael Alyn Miller nor the names of the +# contributors to this software may be used to endorse or promote +# products derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. + +import os, sys, imp + +import docutils.core +import docutils.nodes +import docutils.utils +import docutils.parsers.rst + +from docutils.core import publish_parts +from docutils.parsers.rst import roles + +TEMPLATE = u"""%(html_prolog)s + + +%(html_head)s + + + +%(html_body)s + + +""" +def mech_name(text): + name = text.split('(', 1)[0].split()[0] + base = '' + if name.startswith('MochiKit.'): + # cross-reference + parts = name.split('.') + base = parts[1] + '.html' + if parts[-1] in ("call", "apply"): + parts.pop() + name = '.'.join(parts[2:]) + return base, name + +def role_ref(role, rawtext, text, lineno, inliner, options=None, content=[]): + if options is None: + options = {} + base, name = mech_name(text) + ref = base + if name: + ref += '#fn-' + name.lower() + roles.set_classes(options) + options.setdefault('classes', []).append('ref') + node = docutils.nodes.reference( + text, docutils.utils.unescape(text), refuri=ref, **options) + return [node], [] + +def role_def(role, rawtext, text, lineno, inliner, options=None, content=[]): + if options is None: + options = {} + base, name = mech_name(text) + assert base == '' + ref = 'fn-' + docutils.utils.unescape(name.lower()) + anchor = docutils.nodes.raw('', '\n\n' % (ref,), format='html') + roles.set_classes(options) + options.setdefault('classes', []).append('def') + node = docutils.nodes.reference( + text, utils.unescape(text), refuri='#' + ref, **options) + return [anchor, node], [] + +def code_block_directive(name, arguments, options, content, lineno, + content_offset, block_text, state, state_machine): + """ + Provides syntax highlighting for blocks of code. It is used with + the following syntax:: + + .. code-block:: python + + import sys + + def main(argv): + print "Hello world" + + if __name__ == "__main__": + main(sys.argv) + + + The directive requires the name of a language (case-insensitive) as its + only argument. Currently only Python is supported. All code in the + indented block following the directive will be colorized. + + The directive can also be told to include a source file directly:: + + .. code-block:: + :language: Python + :source-file: ../myfile.py + + You cannot both specify a source-file and include code directly. + """ + from colorize import colorize_ex + + # Get the language name. Try the arguments first, then fall back to + # the options. + try: + language = arguments[0] + except IndexError: + language = options['language'] + + # The user cannot specify content and give us a source-file, they + # must choose one or the other. + if content and 'source-file' in options: + error = state_machine.reporter.error( + 'Must specify a source-file or provide content, not both.', + docutils.nodes.literal_block(block_text, block_text), + line=lineno) + return [error] + + # Load the content from a file if we were not given any content. + if not content: + try: + sourceFile = state_machine.input_lines.source( + lineno - state_machine.input_offset - 1) + sourceDir = os.path.dirname(os.path.abspath(sourceFile)) + + path = options['source-file'] + path = os.path.normpath(os.path.join(sourceDir, path)) + path = docutils.utils.relative_path(None, path) + + state.document.settings.record_dependencies.add(path) + + content = [line.rstrip() for line in file(path)] + except IOError: + error = state_machine.reporter.error( + 'Could not read file %s.' % (path), + docutils.nodes.literal_block(block_text, block_text), + line=lineno) + return [error] + + if language.lower() != "python": + error = state_machine.reporter.error( + 'No lexer found for language "%s".' % (language), + docutils.nodes.literal_block(block_text, block_text), + line=lineno) + return [error] + + # Render the content to HTML. The HTML is wrapped in a .. + # block. + formatted = [] + formatted.append('') + formatted.append(colorize_ex('\n'.join(content))) + formatted.append('\n') + + # Enclose the rendered HTML in a raw docutils node and return the + # node. + raw = docutils.nodes.raw('', "\n".join(formatted), format='html') + return [raw] + +# Configure the code-block directive. +code_block_directive.arguments = (0, 2, True) +code_block_directive.options = { + 'language': docutils.parsers.rst.directives.unchanged, + 'source-file': docutils.parsers.rst.directives.path, +} +code_block_directive.content = True + +def register(): + from colorize import colorize_ex + docutils.parsers.rst.directives.register_directive( + 'code-block', code_block_directive) + roles.register_canonical_role('ref', role_ref) + roles.register_canonical_role('def', role_def) + + +def rst2html_main(): + try: + import locale + locale.setlocale(locale.LC_ALL, '') + except: + pass + + register() + + description = ('Generates (X)HTML documents from standalone ' + 'reStructuredText sources. ' + + docutils.core.default_description) + docutils.core.publish_cmdline(writer_name='html', + description=description) + +def rst2html_main_ex(): + try: + import locale + locale.setlocale(locale.LC_ALL, '') + except: + pass + + register() + + basepath = os.path.join('doc/rst', '') + destpath = os.path.join('doc/html', '') + for root, dirs, files in os.walk(basepath): + if '.svn' in dirs: + dirs.remove('.svn') + destroot = destpath + root[len(basepath):] + if not os.path.exists(destroot): + os.makedirs(destroot) + for fn in files: + basefn, ext = os.path.splitext(fn) + if ext == '.rst': + srcfn = os.path.join(root, fn) + dest = os.path.join(destroot, basefn + '.html') + if basefn != "index": + try: + if os.path.getmtime(dest) >= os.path.getmtime(srcfn): + print srcfn, "not changed" + continue + except OSError: + pass + print srcfn + parts = publish_parts( + source_path=srcfn, + source=open(srcfn, 'rb').read().decode('utf8'), + destination_path=dest, + writer_name='html', + settings_overrides=dict( + embed_stylesheet=False, + stylesheet_path='include/css/documentation.css', + ), + ) + parts['html_head'] = parts['html_head'] % ('utf-8',) + parts['html_prolog'] = parts['html_prolog'] % ('utf-8',) + doc = (TEMPLATE % parts).encode("utf8") + out = open(dest, 'wb') + out.write(doc) + out.close() + +if __name__ == '__main__': + rst2html_main() From jjlee at codespeak.net Sat Sep 23 17:52:59 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 23 Sep 2006 17:52:59 +0200 (CEST) Subject: [wwwsearch-commits] r32608 - in wwwsearch/mechanize/trunk: . mechanize test Message-ID: <20060923155259.4507510060@code0.codespeak.net> Author: jjlee Date: Sat Sep 23 17:52:52 2006 New Revision: 32608 Modified: wwwsearch/mechanize/trunk/functional_tests.py wwwsearch/mechanize/trunk/mechanize/__init__.py wwwsearch/mechanize/trunk/mechanize/_opener.py wwwsearch/mechanize/trunk/mechanize/_response.py wwwsearch/mechanize/trunk/mechanize/_urllib2.py wwwsearch/mechanize/trunk/test/test_mechanize.py Log: Fix OpenerDirector.retrieve(), which was very broken (duncan.booth at suttoncourtenay.org.uk) Modified: wwwsearch/mechanize/trunk/functional_tests.py ============================================================================== --- wwwsearch/mechanize/trunk/functional_tests.py (original) +++ wwwsearch/mechanize/trunk/functional_tests.py Sat Sep 23 17:52:52 2006 @@ -197,17 +197,34 @@ def test_urlretrieve(self): url = "http://www.python.org/" - verif = CallbackVerifier(self) - fn, hdrs = urlretrieve(url, "python.html", verif.callback) - try: - f = open(fn) + test_filename = "python.html" + def check_retrieve(opener, filename, headers): + self.assertEqual(headers.get('Content-Type'), 'text/html') + f = open(filename) data = f.read() f.close() + opener.close() + from urllib import urlopen + r = urlopen(url) + self.assertEqual(data, r.read()) + r.close() + + opener = mechanize.build_opener() + verif = CallbackVerifier(self) + filename, headers = opener.retrieve(url, test_filename, verif.callback) + try: + self.assertEqual(filename, test_filename) + check_retrieve(opener, filename, headers) + self.assert_(os.path.isfile(filename)) finally: - os.remove(fn) - r = urlopen(url) - self.assert_(data == r.read()) - r.close() + os.remove(filename) + + opener = mechanize.build_opener() + verif = CallbackVerifier(self) + filename, headers = opener.retrieve(url, reporthook=verif.callback) + check_retrieve(opener, filename, headers) + # closing the opener removed the temporary file + self.failIf(os.path.isfile(filename)) ## def test_cacheftp(self): ## from urllib2 import CacheFTPHandler, build_opener @@ -226,8 +243,7 @@ self._count = 0 self._testcase = testcase def callback(self, block_nr, block_size, total_size): - if block_nr != self._count: - self._testcase.fail() + self._testcase.assertEqual(block_nr, self._count) self._count = self._count + 1 Modified: wwwsearch/mechanize/trunk/mechanize/__init__.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/__init__.py (original) +++ wwwsearch/mechanize/trunk/mechanize/__init__.py Sat Sep 23 17:52:52 2006 @@ -5,6 +5,7 @@ 'Browser', 'BrowserStateError', 'CacheFTPHandler', + 'ContentTooShortError', 'Cookie', 'CookieJar', 'CookiePolicy', @@ -97,6 +98,7 @@ from _urllib2 import * # misc +from _opener import ContentTooShortError, OpenerFactory, urlretrieve from _util import http2time as str2time from _response import response_seek_wrapper, make_response from _http import HeadParser Modified: wwwsearch/mechanize/trunk/mechanize/_opener.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_opener.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_opener.py Sat Sep 23 17:52:52 2006 @@ -9,7 +9,7 @@ """ -import urllib2, bisect, urlparse, httplib, types +import os, urllib2, bisect, urllib, urlparse, httplib, types, tempfile try: import threading as _threading except ImportError: @@ -26,6 +26,12 @@ from _request import Request +class ContentTooShortError(urllib2.URLError): + def __init__(self, reason, result): + urllib2.URLError.__init__(self, reason) + self.result = result + + class OpenerDirector(urllib2.OpenerDirector): def __init__(self): urllib2.OpenerDirector.__init__(self) @@ -36,6 +42,7 @@ self._any_request = {} self._any_response = {} self._handler_index_valid = True + self._tempfiles = [] def add_handler(self, handler): if handler in self.handlers: @@ -198,52 +205,85 @@ args = (dict, 'default', 'http_error_default') + orig_args return apply(self._call_chain, args) + BLOCK_SIZE = 1024*8 def retrieve(self, fullurl, filename=None, reporthook=None, data=None): """Returns (filename, headers). For remote objects, the default filename will refer to a temporary - file. + file. Temporary files are removed when the OpenerDirector.close() + method is called. + + For file: URLs, at present the returned filename is None. This may + change in future. + + If the actual number of bytes read is less than indicated by the + Content-Length header, raises ContentTooShortError (a URLError + subclass). The exception's .result attribute contains the (filename, + headers) that would have been returned. """ req = self._request(fullurl, data) - type_ = req.get_type() + scheme = req.get_type() fp = self.open(req) headers = fp.info() - if filename is None and type == 'file': - return url2pathname(req.get_selector()), headers + if filename is None and scheme == 'file': + # XXX req.get_selector() seems broken here, return None, + # pending sanity :-/ + return None, headers + #return urllib.url2pathname(req.get_selector()), headers if filename: tfp = open(filename, 'wb') else: - path = urlparse(fullurl)[2] + path = urlparse.urlparse(fullurl)[2] suffix = os.path.splitext(path)[1] - tfp = tempfile.TemporaryFile("wb", suffix=suffix) + fd, filename = tempfile.mkstemp(suffix) + self._tempfiles.append(filename) + tfp = os.fdopen(fd, 'wb') + result = filename, headers - bs = 1024*8 + bs = self.BLOCK_SIZE size = -1 read = 0 - blocknum = 1 + blocknum = 0 if reporthook: - if headers.has_key("content-length"): + if "content-length" in headers: size = int(headers["Content-Length"]) - reporthook(0, bs, size) + reporthook(blocknum, bs, size) while 1: block = fp.read(bs) + if block == "": + break read += len(block) + tfp.write(block) + blocknum += 1 if reporthook: reporthook(blocknum, bs, size) - blocknum = blocknum + 1 - if not block: - break - tfp.write(block) fp.close() tfp.close() del fp del tfp - if size>=0 and read= 0 and read < size: + raise ContentTooShortError( + "retrieval incomplete: " + "got only %i out of %i bytes" % (read, size), + result + ) + return result + def close(self): + urllib2.OpenerDirector.close(self) + + if self._tempfiles: + for filename in self._tempfiles: + try: + os.unlink(filename) + except OSError: + pass + del self._tempfiles[:] + class OpenerFactory: """This class's interface is quite likely to change.""" Modified: wwwsearch/mechanize/trunk/mechanize/_response.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_response.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_response.py Sat Sep 23 17:52:52 2006 @@ -322,6 +322,10 @@ state["wrapped"] = new_wrapped return state +def test_response(data, headers, + url="http://example.com/", code=200, msg="OK"): + return make_response(data, headers, url, code, msg) + def make_response(data, headers, url, code, msg): """Convenient factory for objects implementing response interface. @@ -332,12 +336,18 @@ msg: string response code message (e.g. "OK") """ + mime_headers = make_headers(headers) + r = closeable_response(StringIO(data), mime_headers, url, code, msg) + return response_seek_wrapper(r) + +def make_headers(headers): + """ + headers: sequence of (name, value) pairs + """ hdr_text = [] for name_value in headers: hdr_text.append("%s: %s" % name_value) - mime_headers = mimetools.Message(StringIO("\n".join(hdr_text))) - r = closeable_response(StringIO(data), mime_headers, url, code, msg) - return response_seek_wrapper(r) + return mimetools.Message(StringIO("\n".join(hdr_text))) # Horrible, but needed, at least until fork urllib2. Even then, may want Modified: wwwsearch/mechanize/trunk/mechanize/_urllib2.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_urllib2.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_urllib2.py Sat Sep 23 17:52:52 2006 @@ -6,8 +6,7 @@ GopherError # ...and from mechanize from _opener import OpenerDirector, \ - build_opener, install_opener, urlopen, \ - OpenerFactory, urlretrieve + build_opener, install_opener, urlopen from _auth import \ HTTPPasswordMgr, \ HTTPPasswordMgrWithDefaultRealm, \ Modified: wwwsearch/mechanize/trunk/test/test_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/test/test_mechanize.py (original) +++ wwwsearch/mechanize/trunk/test/test_mechanize.py Sat Sep 23 17:52:52 2006 @@ -1,6 +1,6 @@ #!/usr/bin/env python -import sys, random +import sys, os, random, math from unittest import TestCase import StringIO, re, UserDict, urllib2 @@ -15,6 +15,18 @@ FACTORY_CLASSES.append(mechanize.RobustFactory) +def killfile(filename): + try: + os.remove(filename) + except OSError: + if os.name=='nt': + try: + os.chmod(arg, stat.S_IWRITE) + os.remove(arg) + except OSError: + pass + + class RegressionTests(TestCase): def test_close_base_tag(self): @@ -183,6 +195,165 @@ default_schemes = [] +class OpenerTests(TestCase): + + def test_retrieve(self): + # The .retrieve() method deals with a number of different cases. In + # each case, .read() should be called the expected number of times, the + # progress callback should be called as expected, and we should end up + # with a filename and some headers. + + class Opener(mechanize.OpenerDirector): + def __init__(self, content_length=None): + mechanize.OpenerDirector.__init__(self) + self.calls = [] + self.block_size = mechanize.OpenerDirector.BLOCK_SIZE + self.nr_blocks = 2.5 + self.data = int((self.block_size/8)*self.nr_blocks)*"01234567" + self.total_size = len(self.data) + self._content_length = content_length + def open(self, fullurl, data=None): + from mechanize import _response + self.calls.append((fullurl, data)) + headers = [("Foo", "Bar")] + if self._content_length is not None: + if self._content_length is True: + content_length = str(len(self.data)) + else: + content_length = str(self._content_length) + headers.append(("content-length", content_length)) + return _response.test_response(self.data, headers) + + class CallbackVerifier: + def __init__(self, testcase, total_size, block_size): + self.count = 0 + self._testcase = testcase + self._total_size = total_size + self._block_size = block_size + def callback(self, block_nr, block_size, total_size): + self._testcase.assertEqual(block_nr, self.count) + self._testcase.assertEqual(block_size, self._block_size) + self._testcase.assertEqual(total_size, self._total_size) + self.count += 1 + + # ensure we start without the test file present + tfn = "mechanize_test_73940ukewrl.txt" + killfile(tfn) + + # case 1: filename supplied + op = Opener() + verif = CallbackVerifier(self, -1, op.block_size) + url = "http://example.com/" + try: + filename, headers = op.retrieve( + url, tfn, reporthook=verif.callback) + self.assertEqual(filename, tfn) + self.assertEqual(headers["foo"], 'Bar') + self.assertEqual(open(filename, "rb").read(), op.data) + self.assertEqual(len(op.calls), 1) + self.assertEqual(verif.count, math.ceil(op.nr_blocks) + 1) + op.close() + # .close()ing the opener does NOT remove non-temporary files + self.assert_(os.path.isfile(filename)) + finally: + killfile(filename) + + # case 2: no filename supplied, use a temporary file + op = Opener(content_length=True) + # We asked the Opener to add a content-length header to the response + # this time. Verify the total size passed to the callback is that case + # is according to the content-length (rather than -1). + verif = CallbackVerifier(self, op.total_size, op.block_size) + url = "http://example.com/" + filename, headers = op.retrieve(url, reporthook=verif.callback) + self.assertNotEqual(filename, tfn) # (some temp filename instead) + self.assertEqual(headers["foo"], 'Bar') + self.assertEqual(open(filename, "rb").read(), op.data) + self.assertEqual(len(op.calls), 1) + # .close()ing the opener removes temporary files + self.assert_(os.path.exists(filename)) + op.close() + self.failIf(os.path.exists(filename)) + self.assertEqual(verif.count, math.ceil(op.nr_blocks) + 1) + + # case 3: "file:" URL with no filename supplied + # we DON'T create a temporary file, since there's a file there already + op = Opener() + verif = CallbackVerifier(self, -1, op.block_size) + tifn = "input_for_"+tfn + try: + f = open(tifn, 'wb') + try: + f.write(op.data) + finally: + f.close() + url = "file://" + tifn + filename, headers = op.retrieve(url, reporthook=verif.callback) + self.assertEqual(filename, None) # this may change + self.assertEqual(headers["foo"], 'Bar') + self.assertEqual(open(tifn, "rb").read(), op.data) + # no .read()s took place, since we already have the disk file, + # and we weren't asked to write it to another filename + self.assertEqual(verif.count, 0) + op.close() + # .close()ing the opener does NOT remove the file! + self.assert_(os.path.isfile(tifn)) + finally: + killfile(tifn) + + # case 4: "file:" URL and filename supplied + # we DO create a new file in this case + op = Opener() + verif = CallbackVerifier(self, -1, op.block_size) + tifn = "input_for_"+tfn + try: + f = open(tifn, 'wb') + try: + f.write(op.data) + finally: + f.close() + url = "file://" + tifn + try: + filename, headers = op.retrieve( + url, tfn, reporthook=verif.callback) + self.assertEqual(filename, tfn) + self.assertEqual(headers["foo"], 'Bar') + self.assertEqual(open(tifn, "rb").read(), op.data) + self.assertEqual(verif.count, math.ceil(op.nr_blocks) + 1) + op.close() + # .close()ing the opener does NOT remove non-temporary files + self.assert_(os.path.isfile(tfn)) + finally: + killfile(tfn) + finally: + killfile(tifn) + + # Content-Length mismatch with real file length gives URLError + big = 1024*32 + op = Opener(content_length=big) + verif = CallbackVerifier(self, big, op.block_size) + url = "http://example.com/" + try: + try: + op.retrieve(url, reporthook=verif.callback) + except mechanize.ContentTooShortError, exc: + filename, headers = exc.result + self.assertNotEqual(filename, tfn) + self.assertEqual(headers["foo"], 'Bar') + # We still read and wrote to disk everything available, despite + # the exception. + self.assertEqual(open(filename, "rb").read(), op.data) + self.assertEqual(len(op.calls), 1) + self.assertEqual(verif.count, math.ceil(op.nr_blocks) + 1) + # cleanup should still take place + self.assert_(os.path.isfile(filename)) + op.close() + self.failIf(os.path.isfile(filename)) + else: + self.fail() + finally: + killfile(filename) + class BrowserTests(TestCase): def test_referer(self): b = TestBrowser() From jjlee at codespeak.net Sat Sep 23 17:54:54 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 23 Sep 2006 17:54:54 +0200 (CEST) Subject: [wwwsearch-commits] r32609 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20060923155454.D47D310060@code0.codespeak.net> Author: jjlee Date: Sat Sep 23 17:54:54 2006 New Revision: 32609 Modified: wwwsearch/mechanize/trunk/mechanize/_opener.py Log: Crash in a much more obvious way if trying to use OpenerDirector after .close() Modified: wwwsearch/mechanize/trunk/mechanize/_opener.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_opener.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_opener.py Sat Sep 23 17:54:54 2006 @@ -276,6 +276,9 @@ def close(self): urllib2.OpenerDirector.close(self) + # make it very obvious this object is no longer supposed to be used + self.open = self.error = self.retrieve = self.add_handler = None + if self._tempfiles: for filename in self._tempfiles: try: From jjlee at codespeak.net Sat Sep 23 18:05:12 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 23 Sep 2006 18:05:12 +0200 (CEST) Subject: [wwwsearch-commits] r32610 - wwwsearch/mechanize/trunk/test Message-ID: <20060923160512.9F99C10060@code0.codespeak.net> Author: jjlee Date: Sat Sep 23 18:05:09 2006 New Revision: 32610 Modified: wwwsearch/mechanize/trunk/test/test_mechanize.py Log: Don't crash in tests on trying to clean up temp files Windows (I hope) Modified: wwwsearch/mechanize/trunk/test/test_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/test/test_mechanize.py (original) +++ wwwsearch/mechanize/trunk/test/test_mechanize.py Sat Sep 23 18:05:09 2006 @@ -21,8 +21,8 @@ except OSError: if os.name=='nt': try: - os.chmod(arg, stat.S_IWRITE) - os.remove(arg) + os.chmod(filename, stat.S_IWRITE) + os.remove(filename) except OSError: pass From jjlee at codespeak.net Thu Sep 28 03:17:22 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 28 Sep 2006 03:17:22 +0200 (CEST) Subject: [wwwsearch-commits] r32686 - wwwsearch/mechanize/trunk Message-ID: <20060928011722.2810C1006F@code0.codespeak.net> Author: jjlee Date: Thu Sep 28 03:17:06 2006 New Revision: 32686 Modified: wwwsearch/mechanize/trunk/test.py Log: Don't remove excluded doctests unless they exist (duncan.booth at suttoncourtenay.org.uk) Modified: wwwsearch/mechanize/trunk/test.py ============================================================================== --- wwwsearch/mechanize/trunk/test.py (original) +++ wwwsearch/mechanize/trunk/test.py Thu Sep 28 03:17:06 2006 @@ -142,8 +142,10 @@ os.path.join("test", "test_scratch.doctest"), ] doctest_files = glob.glob(os.path.join("test", "*.doctest")) + for dt in special_doctests: - doctest_files.remove(dt) + if dt in doctest_files: + doctest_files.remove(dt) for df in doctest_files: doctest.testfile(df) From jjlee at codespeak.net Thu Sep 28 03:25:25 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Thu, 28 Sep 2006 03:25:25 +0200 (CEST) Subject: [wwwsearch-commits] r32687 - in wwwsearch/mechanize/trunk: . mechanize test Message-ID: <20060928012525.D2FDA1006F@code0.codespeak.net> Author: jjlee Date: Thu Sep 28 03:25:08 2006 New Revision: 32687 Modified: wwwsearch/mechanize/trunk/functional_tests.py wwwsearch/mechanize/trunk/mechanize/_mechanize.py wwwsearch/mechanize/trunk/mechanize/_response.py wwwsearch/mechanize/trunk/test/test_mechanize.py Log: .reload() on .back() if necessary (necessary iff response was not fully .read() on first .open()ing) Modified: wwwsearch/mechanize/trunk/functional_tests.py ============================================================================== --- wwwsearch/mechanize/trunk/functional_tests.py (original) +++ wwwsearch/mechanize/trunk/functional_tests.py Thu Sep 28 03:25:08 2006 @@ -226,6 +226,14 @@ # closing the opener removed the temporary file self.failIf(os.path.isfile(filename)) + def test_reload_read_incomplete(self): + from mechanize import Browser + browser = Browser() + browser.open("http://plone.org") + browser.open("http://plone.org/products") + browser.back() + browser.follow_link(text="About") + ## def test_cacheftp(self): ## from urllib2 import CacheFTPHandler, build_opener ## o = build_opener(CacheFTPHandler()) Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Thu Sep 28 03:25:08 2006 @@ -207,14 +207,6 @@ self.form = None self._response = _upgrade.upgrade_response(response) - - # XXX - # Temporary hack to eagerly read data (otherwise, History can contain - # closed and partially-read responses). Proper fix is for responses to - # know if they're partially read or not; .back() should then .reload() - # if required. - self._response.get_data() - self._factory.set_response(self._response) def geturl(self): @@ -241,6 +233,8 @@ self._response.close() self.request, response = self._history.back(n, self._response) self.set_response(response) + if not response.read_complete: + self.reload() return response def clear_history(self): Modified: wwwsearch/mechanize/trunk/mechanize/_response.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_response.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_response.py Thu Sep 28 03:25:08 2006 @@ -50,6 +50,7 @@ def __init__(self, wrapped): self.wrapped = wrapped + self.__read_complete_state = [False] self.__have_readline = hasattr(self.wrapped, "readline") self.__cache = StringIO() self.__pos = 0 # seek position @@ -60,11 +61,21 @@ return self.wrapped.tell() == len(self.__cache.getvalue()) def __getattr__(self, name): + if name == "read_complete": + return self.__read_complete_state[0] + wrapped = self.__dict__.get("wrapped") if wrapped: return getattr(wrapped, name) + return getattr(self.__class__, name) + def __setattr__(self, name, value): + if name == "read_complete": + self.__read_complete_state[0] = bool(value) + else: + self.__dict__[name] = value + def seek(self, offset, whence=0): assert whence in [0,1,2] @@ -93,9 +104,14 @@ if to_read is None: assert whence == 2 self.__cache.write(self.wrapped.read()) + self.read_complete = True self.__pos = self.__cache.tell() - offset else: - self.__cache.write(self.wrapped.read(to_read)) + data = self.wrapped.read(to_read) + if not data: + self.read_complete = True + else: + self.__cache.write(data) # Don't raise an exception even if we've seek()ed past the end # of .wrapped, since fseek() doesn't complain in that case. # Also like fseek(), pretend we have seek()ed past the end, @@ -112,6 +128,7 @@ def __copy__(self): cpy = self.__class__(self.wrapped) cpy.__cache = self.__cache + cpy.__read_complete_state = self.__read_complete_state return cpy def get_data(self): @@ -137,10 +154,15 @@ self.__cache.seek(0, 2) if size == -1: self.__cache.write(self.wrapped.read()) + self.read_complete = True else: to_read = size - available assert to_read > 0 - self.__cache.write(self.wrapped.read(to_read)) + data = self.wrapped.read(to_read) + if not data: + self.read_complete = True + else: + self.__cache.write(data) self.__cache.seek(pos) data = self.__cache.read(size) @@ -156,7 +178,11 @@ # read another line first pos = self.__pos self.__cache.seek(0, 2) - self.__cache.write(self.wrapped.readline()) + data = self.wrapped.readline() + if not data: + self.read_complete = True + else: + self.__cache.write(data) self.__cache.seek(pos) data = self.__cache.readline() @@ -172,6 +198,7 @@ pos = self.__pos self.__cache.seek(0, 2) self.__cache.write(self.wrapped.read()) + self.read_complete = True self.__cache.seek(pos) data = self.__cache.readlines(sizehint) self.__pos = self.__cache.tell() Modified: wwwsearch/mechanize/trunk/test/test_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/test/test_mechanize.py (original) +++ wwwsearch/mechanize/trunk/test/test_mechanize.py Thu Sep 28 03:25:08 2006 @@ -130,6 +130,8 @@ if info is None: info = {} self._info = MockHeaders(info) self.source = "%d%d" % (id(self), random.randint(0, sys.maxint-1)) + # otherwise we can't test for "same_response" in test_history + self.read_complete = True def info(self): return self._info def geturl(self): return self.url def read(self, size=-1): return self.fp.read(size) @@ -465,7 +467,8 @@ self.assertRaises(mechanize.BrowserStateError, b.back, 2) r8 = b.open("/spam") - # even if we get a HTTPError, history and .response() should still get updated + # even if we get a HTTPError, history and .response() should still get + # updated error = urllib2.HTTPError("http://example.com/bad", 503, "Oops", MockHeaders(), StringIO.StringIO()) b.add_handler(make_mock_handler()([("https_open", error)])) @@ -476,6 +479,38 @@ b.close() # XXX assert BrowserStateError + def test_reload_read_incomplete(self): + import mechanize + from mechanize._response import test_response + class Browser(TestBrowser): + def __init__(self): + TestBrowser.__init__(self) + self.reloaded = False + def reload(self): + self.reloaded = True + TestBrowser.reload(self) + br = Browser() + data = "%s" + data = data % ("The quick brown fox jumps over the lazy dog."*100) + r = test_response(data, [("content-type", "text/html")]) + br.add_handler(make_mock_handler()([("http_open", r)])) + + # .reload() on .back() if the whole response hasn't already been read + # (.read_incomplete is True) + r = br.open(r.geturl()) + r.read(10) + br.open('http://www.example.com/blah') + self.failIf(br.reloaded) + br.back() + self.assert_(br.reloaded) + + # don't reload if already read + br.reloaded = False + br.response().read() + br.open('http://www.example.com/blah') + br.back() + self.failIf(br.reloaded) + def test_viewing_html(self): # XXX not testing multiple Content-Type headers import mechanize From jjlee at codespeak.net Sat Sep 30 20:43:27 2006 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 30 Sep 2006 20:43:27 +0200 (CEST) Subject: [wwwsearch-commits] r32768 - wwwsearch/mechanize/trunk/test Message-ID: <20060930184327.8507010053@code0.codespeak.net> Author: jjlee Date: Sat Sep 30 20:43:24 2006 New Revision: 32768 Added: wwwsearch/mechanize/trunk/test/test_html.py wwwsearch/mechanize/trunk/test/test_opener.py wwwsearch/mechanize/trunk/test/test_useragent.py Modified: wwwsearch/mechanize/trunk/test/test_mechanize.py wwwsearch/mechanize/trunk/test/test_misc.py wwwsearch/mechanize/trunk/test/test_urllib2.py Log: Split up test_mechanize.py and update some comments / docstrings Added: wwwsearch/mechanize/trunk/test/test_html.py ============================================================================== --- (empty file) +++ wwwsearch/mechanize/trunk/test/test_html.py Sat Sep 30 20:43:24 2006 @@ -0,0 +1,91 @@ +#!/usr/bin/env python + +from unittest import TestCase + +import mechanize + + +class RegressionTests(TestCase): + + def test_close_base_tag(self): + # any document containing a tag used to cause an exception + br = mechanize.Browser() + response = mechanize.make_response( + "", [("Content-type", "text/html")], "", 200, "OK") + br.set_response(response) + list(br.links()) + + +class CachingGeneratorFunctionTests(TestCase): + + def _get_simple_cgenf(self, log): + from mechanize._html import CachingGeneratorFunction + todo = [] + for ii in range(2): + def work(ii=ii): + log.append(ii) + return ii + todo.append(work) + def genf(): + for a in todo: + yield a() + return CachingGeneratorFunction(genf()) + + def test_cache(self): + log = [] + cgenf = self._get_simple_cgenf(log) + for repeat in range(2): + for ii, jj in zip(cgenf(), range(2)): + self.assertEqual(ii, jj) + self.assertEqual(log, range(2)) # work only done once + + def test_interleaved(self): + log = [] + cgenf = self._get_simple_cgenf(log) + cgen = cgenf() + self.assertEqual(cgen.next(), 0) + self.assertEqual(log, [0]) + cgen2 = cgenf() + self.assertEqual(cgen2.next(), 0) + self.assertEqual(log, [0]) + self.assertEqual(cgen.next(), 1) + self.assertEqual(log, [0, 1]) + self.assertEqual(cgen2.next(), 1) + self.assertEqual(log, [0, 1]) + self.assertRaises(StopIteration, cgen.next) + self.assertRaises(StopIteration, cgen2.next) + + +class UnescapeTests(TestCase): + + def test_unescape_charref(self): + from mechanize._html import unescape_charref + mdash_utf8 = u"\u2014".encode("utf-8") + for ref, codepoint, utf8, latin1 in [ + ("38", 38, u"&".encode("utf-8"), "&"), + ("x2014", 0x2014, mdash_utf8, "—"), + ("8212", 8212, mdash_utf8, "—"), + ]: + self.assertEqual(unescape_charref(ref, None), unichr(codepoint)) + self.assertEqual(unescape_charref(ref, 'latin-1'), latin1) + self.assertEqual(unescape_charref(ref, 'utf-8'), utf8) + + def test_unescape(self): + import htmlentitydefs + from mechanize._html import unescape + data = "& < — — —" + mdash_utf8 = u"\u2014".encode("utf-8") + ue = unescape(data, htmlentitydefs.name2codepoint, "utf-8") + self.assertEqual("& < %s %s %s" % ((mdash_utf8,)*3), ue) + + for text, expect in [ + ("&a&", "&a&"), + ("a&", "a&"), + ]: + got = unescape(text, htmlentitydefs.name2codepoint, "latin-1") + self.assertEqual(got, expect) + + +if __name__ == "__main__": + import unittest + unittest.main() Modified: wwwsearch/mechanize/trunk/test/test_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/test/test_mechanize.py (original) +++ wwwsearch/mechanize/trunk/test/test_mechanize.py Sat Sep 30 20:43:24 2006 @@ -1,8 +1,9 @@ #!/usr/bin/env python +"""Tests for mechanize.Browser.""" -import sys, os, random, math +import sys, os, random from unittest import TestCase -import StringIO, re, UserDict, urllib2 +import StringIO, re, urllib2 import mechanize FACTORY_CLASSES = [mechanize.DefaultFactory] @@ -15,100 +16,8 @@ FACTORY_CLASSES.append(mechanize.RobustFactory) -def killfile(filename): - try: - os.remove(filename) - except OSError: - if os.name=='nt': - try: - os.chmod(filename, stat.S_IWRITE) - os.remove(filename) - except OSError: - pass - - -class RegressionTests(TestCase): - - def test_close_base_tag(self): - # any document containing a tag used to cause an exception - br = mechanize.Browser() - response = mechanize.make_response( - "", [("Content-type", "text/html")], "", 200, "OK") - br.set_response(response) - list(br.links()) - - -class CachingGeneratorFunctionTests(TestCase): - - def _get_simple_cgenf(self, log): - from mechanize._html import CachingGeneratorFunction - todo = [] - for ii in range(2): - def work(ii=ii): - log.append(ii) - return ii - todo.append(work) - def genf(): - for a in todo: - yield a() - return CachingGeneratorFunction(genf()) - - def test_cache(self): - log = [] - cgenf = self._get_simple_cgenf(log) - for repeat in range(2): - for ii, jj in zip(cgenf(), range(2)): - self.assertEqual(ii, jj) - self.assertEqual(log, range(2)) # work only done once - - def test_interleaved(self): - log = [] - cgenf = self._get_simple_cgenf(log) - cgen = cgenf() - self.assertEqual(cgen.next(), 0) - self.assertEqual(log, [0]) - cgen2 = cgenf() - self.assertEqual(cgen2.next(), 0) - self.assertEqual(log, [0]) - self.assertEqual(cgen.next(), 1) - self.assertEqual(log, [0, 1]) - self.assertEqual(cgen2.next(), 1) - self.assertEqual(log, [0, 1]) - self.assertRaises(StopIteration, cgen.next) - self.assertRaises(StopIteration, cgen2.next) - - -class UnescapeTests(TestCase): - - def test_unescape_charref(self): - from mechanize._html import unescape_charref - mdash_utf8 = u"\u2014".encode("utf-8") - for ref, codepoint, utf8, latin1 in [ - ("38", 38, u"&".encode("utf-8"), "&"), -