From jjlee at codespeak.net Sat Jul 7 18:28:58 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 7 Jul 2007 18:28:58 +0200 (CEST) Subject: [wwwsearch-commits] r44825 - wwwsearch/mechanize/trunk/test-tools Message-ID: <20070707162858.7626481C1@code0.codespeak.net> Author: jjlee Date: Sat Jul 7 18:28:56 2007 New Revision: 44825 Modified: wwwsearch/mechanize/trunk/test-tools/testprogram.py Log: * Use SO_REUSEADDR for test server. * Raise exception if local server fails to start. Modified: wwwsearch/mechanize/trunk/test-tools/testprogram.py ============================================================================== --- wwwsearch/mechanize/trunk/test-tools/testprogram.py (original) +++ wwwsearch/mechanize/trunk/test-tools/testprogram.py Sat Jul 7 18:28:56 2007 @@ -42,6 +42,7 @@ import socket def connect(): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sock.settimeout(1.0) try: sock.connect(('127.0.0.1', self.port)) @@ -70,6 +71,8 @@ timeout = min(timeout, hard_limit) else: break + else: + raise def kill_windows(handle, report_hook): try: From jjlee at codespeak.net Sat Jul 7 18:39:31 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 7 Jul 2007 18:39:31 +0200 (CEST) Subject: [wwwsearch-commits] r44827 - in wwwsearch/mechanize/trunk: mechanize test Message-ID: <20070707163931.8E4F981C6@code0.codespeak.net> Author: jjlee Date: Sat Jul 7 18:39:31 2007 New Revision: 44827 Modified: wwwsearch/mechanize/trunk/mechanize/_http.py wwwsearch/mechanize/trunk/test/test_urllib2.py Log: * Log skipped Refreshes * Add some more Refresh tests Modified: wwwsearch/mechanize/trunk/mechanize/_http.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_http.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_http.py Sat Jul 7 18:39:31 2007 @@ -540,6 +540,7 @@ def __init__(self, max_time=0, honor_time=True): self.max_time = max_time self.honor_time = honor_time + self._sleep = time.sleep def http_response(self, request, response): code, msg, hdrs = response.code, response.msg, response.info() @@ -551,16 +552,19 @@ except ValueError: debug("bad Refresh header: %r" % refresh) return response + if newurl is None: newurl = response.geturl() if (self.max_time is None) or (pause <= self.max_time): if pause > 1E-3 and self.honor_time: - time.sleep(pause) + self._sleep(pause) hdrs["location"] = newurl # hardcoded http is NOT a bug response = self.parent.error( "http", request, response, "refresh", msg, hdrs) + else: + debug("Refresh header ignored: %r" % refresh) return response Modified: wwwsearch/mechanize/trunk/test/test_urllib2.py ============================================================================== --- wwwsearch/mechanize/trunk/test/test_urllib2.py (original) +++ wwwsearch/mechanize/trunk/test/test_urllib2.py Sat Jul 7 18:39:31 2007 @@ -919,6 +919,48 @@ self.assertEqual(o.proto, "http") self.assertEqual(o.args, (req, r, "refresh", "OK", headers)) + def test_refresh_honor_time(self): + class SleepTester: + def __init__(self, test, seconds): + self._test = test + if seconds is 0: + seconds = None # don't expect a sleep for 0 seconds + self._expected = seconds + self._got = None + def sleep(self, seconds): + self._got = seconds + def verify(self): + self._test.assertEqual(self._expected, self._got) + class Opener: + called = False + def error(self, *args, **kwds): + self.called = True + def test(rp, header, refresh_after): + expect_refresh = refresh_after is not None + opener = Opener() + rp.parent = opener + st = SleepTester(self, refresh_after) + rp._sleep = st.sleep + rp.http_response(Request("http://example.com"), + test_response(headers=[("Refresh", header)]), + ) + self.assertEqual(expect_refresh, opener.called) + st.verify() + + # by default, only zero-time refreshes are honoured + test(HTTPRefreshProcessor(), "0", 0) + test(HTTPRefreshProcessor(), "2", None) + + # if requested, more than zero seconds are allowed + test(HTTPRefreshProcessor(max_time=None), "2", 2) + test(HTTPRefreshProcessor(max_time=30), "2", 2) + + # no sleep if we don't "honor_time" + test(HTTPRefreshProcessor(max_time=30, honor_time=False), "2", 0) + + # request for too-long wait before refreshing --> no refresh occurs + test(HTTPRefreshProcessor(max_time=30), "60", None) + def test_redirect(self): from_url = "http://example.com/a.html" to_url = "http://example.com/b.html" From jjlee at codespeak.net Sat Jul 7 18:47:34 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 7 Jul 2007 18:47:34 +0200 (CEST) Subject: [wwwsearch-commits] r44828 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20070707164734.BA82781C7@code0.codespeak.net> Author: jjlee Date: Sat Jul 7 18:47:34 2007 New Revision: 44828 Modified: wwwsearch/mechanize/trunk/mechanize/_useragent.py Log: Change default mechanize.UserAgent (hence mechanize.Browser) Refresh behaviour: * Don't follow Refreshes > 30 seconds * honor_time is now False by default This is a backwards-incompatible change. The old default behaviour was confusing and rarely useful. You can get behaviour the same as the old defaults like so: browser.set_handle_refresh(True, max_time=None, honor_time=True) Modified: wwwsearch/mechanize/trunk/mechanize/_useragent.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_useragent.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_useragent.py Sat Jul 7 18:47:34 2007 @@ -229,7 +229,7 @@ def set_handle_redirect(self, handle): """Set whether to handle HTTP 30x redirections.""" self._set_handler("_redirect", handle) - def set_handle_refresh(self, handle, max_time=None, honor_time=True): + def set_handle_refresh(self, handle, max_time=30.0, honor_time=False): """Set whether to handle HTTP Refresh headers.""" self._set_handler("_refresh", handle, constructor_kwds= {"max_time": max_time, "honor_time": honor_time}) From jjlee at codespeak.net Sat Jul 7 19:09:27 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 7 Jul 2007 19:09:27 +0200 (CEST) Subject: [wwwsearch-commits] r44829 - in wwwsearch/mechanize/trunk: . test-tools Message-ID: <20070707170927.C645A81CA@code0.codespeak.net> Author: jjlee Date: Sat Jul 7 19:09:27 2007 New Revision: 44829 Modified: wwwsearch/mechanize/trunk/functional_tests.py wwwsearch/mechanize/trunk/test-tools/cookietest.cgi wwwsearch/mechanize/trunk/test-tools/testprogram.py wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py Log: * Add a functional test for Refresh * Update docstrings / comments re local functional testing server Modified: wwwsearch/mechanize/trunk/functional_tests.py ============================================================================== --- wwwsearch/mechanize/trunk/functional_tests.py (original) +++ wwwsearch/mechanize/trunk/functional_tests.py Sat Jul 7 19:09:27 2007 @@ -4,7 +4,7 @@ # thanks Moof (aka Giles Antonio Radford) for some of these -import os, sys +import os, sys, urllib from unittest import TestCase import mechanize @@ -15,13 +15,6 @@ HTTPRedirectDebugProcessor, HTTPResponseDebugProcessor from mechanize._rfc3986 import urljoin -# XXX -# document twisted.web2 install (I forgot how I did it -- reinstall!) -# implement remaining stuff used by functional_tests.py -# in twisted-localserver.py: -# - 302 followed by 404 response -# - helper cgi script for cookies &c. - #from cookielib import CookieJar #from urllib2 import build_opener, install_opener, urlopen #from urllib2 import HTTPCookieProcessor, HTTPHandler @@ -90,6 +83,26 @@ self.assertEqual(r.code, 200) self.assert_("GeneralFAQ.html" in r.read(2048)) + def test_refresh(self): + def refresh_request(seconds): + uri = urljoin(self.uri, "/cgi-bin/cookietest.cgi") + val = urllib.quote_plus('%d; url="%s"' % (seconds, self.uri)) + return uri + ("?refresh=%s" % val) + r = self.browser.open(refresh_request(5)) + self.assertEqual(r.geturl(), self.uri) + # Refresh with pause > 30 seconds is ignored by default (these long + # refreshes tend to be there only because the website owner wants you + # to see the latest news, or whatever -- they're not essential to the + # operation of the site, and not really useful or appropriate when + # scraping). + refresh_uri = refresh_request(60) + r = self.browser.open(refresh_uri) + self.assertEqual(r.geturl(), refresh_uri) + # allow long refreshes (note we don't actually wait 60 seconds by default) + self.browser.set_handle_refresh(True, max_time=None) + r = self.browser.open(refresh_request(60)) + self.assertEqual(r.geturl(), self.uri) + def test_file_url(self): url = "file://%s" % sanepathname2url( os.path.abspath('functional_tests.py')) @@ -409,9 +422,8 @@ - start a local Twisted HTTP server and run the functional tests against that, rather than against SourceForge (quicker!) - Note not all the functional tests use the local server yet - -- some currently always access the internet regardless of - this option and the --uri option. + If this option doesn't work on Windows/Mac, somebody please + tell me about it, or I'll never find out... """ prog = testprogram.TestProgram( ["functional_tests"], Modified: wwwsearch/mechanize/trunk/test-tools/cookietest.cgi ============================================================================== --- wwwsearch/mechanize/trunk/test-tools/cookietest.cgi (original) +++ wwwsearch/mechanize/trunk/test-tools/cookietest.cgi Sat Jul 7 19:09:27 2007 @@ -3,9 +3,12 @@ # This is used by functional_tests.py +#import cgitb; cgitb.enable() + print "Content-Type: text/html" print "Set-Cookie: foo=bar\n" -import sys, os, string, cgi, Cookie +import sys, os, string, cgi, Cookie, urllib +from xml.sax import saxutils from types import ListType @@ -13,8 +16,18 @@ cookie = Cookie.SimpleCookie() cookieHdr = os.environ.get("HTTP_COOKIE", "") cookie.load(cookieHdr) -if not cookie.has_key("foo"): +form = cgi.FieldStorage() +refresh_value = None +if form.has_key("refresh"): + refresh = form["refresh"] + if not isinstance(refresh, ListType): + refresh_value = refresh.value +if refresh_value is not None: + print '' % ( + saxutils.quoteattr(urllib.unquote_plus(refresh_value))) +elif not cookie.has_key("foo"): print '' + print "" print "
Received cookies:
" print "" @@ -26,7 +39,6 @@ print "" print cgi.escape(os.environ.get("HTTP_REFERER", "")) print "" -form = cgi.FieldStorage() print "Received parameters:
" print "" for k in form.keys(): Modified: wwwsearch/mechanize/trunk/test-tools/testprogram.py ============================================================================== --- wwwsearch/mechanize/trunk/test-tools/testprogram.py (original) +++ wwwsearch/mechanize/trunk/test-tools/testprogram.py Sat Jul 7 19:09:27 2007 @@ -210,7 +210,7 @@ """ def __init__(self, moduleNames, localServerProcess, defaultTest=None, argv=None, testRunner=None, testLoader=defaultTestLoader, - defaultUri="http://wwwsearch.sf.net/", + defaultUri="http://wwwsearch.sourceforge.net/", usageExamples=USAGE_EXAMPLES, ): self.modules = [] Modified: wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py ============================================================================== --- wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py (original) +++ wwwsearch/mechanize/trunk/test-tools/twisted-localserver.py Sat Jul 7 19:09:27 2007 @@ -9,9 +9,9 @@ python test-tools/twisted-localserver.py 8042 python functional_tests.py --uri=http://localhost:8042/ -You need Twisted XXX version to run it: +You need twisted.web2 to run it. On ubuntu feisty, you can install it like so: -XXX installation instructions +sudo apt-get install python-twisted-web2 """ import sys, re From jjlee at codespeak.net Sat Jul 7 20:11:16 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 7 Jul 2007 20:11:16 +0200 (CEST) Subject: [wwwsearch-commits] r44830 - wwwsearch/release_scripts Message-ID: <20070707181116.C880681CD@code0.codespeak.net> Author: jjlee Date: Sat Jul 7 20:11:15 2007 New Revision: 44830 Modified: wwwsearch/release_scripts/mrelease.py wwwsearch/release_scripts/release.py Log: * Use subprocess.call() instead of os.system() * Exit if an external command fails * Add --no-version-check and --no-date-check arguments for testing source release build process Modified: wwwsearch/release_scripts/mrelease.py ============================================================================== --- wwwsearch/release_scripts/mrelease.py (original) +++ wwwsearch/release_scripts/mrelease.py Sat Jul 7 20:11:15 2007 @@ -67,7 +67,9 @@ if tag: sdist.tag(src, clean=options.clean) if rel: - sdist.build(update=options.update, clean=options.clean) + sdist.build(update=options.update, clean=options.clean, + check_versions=options.check_versions, + check_dates=options.check_dates) if upload: sdist.upload_to_sourceforge() Modified: wwwsearch/release_scripts/release.py ============================================================================== --- wwwsearch/release_scripts/release.py (original) +++ wwwsearch/release_scripts/release.py Sat Jul 7 20:11:15 2007 @@ -1,6 +1,6 @@ #!/usr/bin/env python -import sys, os, re, tempfile, time, stat, posixpath, shutil +import sys, os, re, tempfile, time, stat, posixpath, shutil, subprocess import logging logger = logging.getLogger("wwwsearch.release") @@ -193,13 +193,22 @@ finally: chdir(cwd, self.pretend) - def build(self, update=True, clean=False): + def build(self, update=True, clean=False, + check_versions=True, + check_dates=True, + ): self.prepare(update, clean) if self.pretend: print "(would check versions here)" else: - bad_versions = self.check_versions() - bad_dates = self.check_dates() + if check_versions: + bad_versions = self.check_versions() + else: + bad_versions = [] + if check_dates: + bad_dates = self.check_dates() + else: + bad_dates = [] if bad_versions: raise BuildError( "version doesn't match in %s" % " ".join(bad_versions)) @@ -280,10 +289,17 @@ def build_files(self): return self.build(self.files) -def system(cmd, pretend=False): +def system(cmd, pretend=False, stdout=None): print cmd if not pretend: - os.system(cmd) + args = cmd.split() + assert ">" not in args, "shell redirect in command: "+cmd + try: + r = subprocess.call(args, stdout=stdout) + except OSError, exc: + raise RuntimeError("%s while executing: %s" % (exc, args)) + if r != 0: + raise RuntimeError("%d exit status from: %s" % (r, args)) def rename(src, dest, pretend=False): print "renaming %s --> %s" % (src, dest) @@ -337,7 +353,8 @@ if defines: def_text = " %s " % (" ".join(["-D%s" % define for define in defines])) def cmd(fn): - return system("em.py %s%s > %s" % (filename, def_text, fn), pretend) + system("em.py %s%s" % (filename, def_text), + pretend, stdout=open(fn, "w")) out_fn = wrap_command( cmd, os.path.dirname(filename), @@ -347,8 +364,11 @@ return out_fn def lynx_dump(filename, pretend=False): + def cmd(fn): + return system("lynx -dump %s" % filename, + pretend, stdout=open(fn, "w")) out_fn = wrap_command( - lambda fn: system("lynx -dump %s > %s" % (filename, fn), pretend), + cmd, os.path.dirname(filename), os.path.splitext(os.path.basename(filename))[0]+".txt", pretend=pretend, @@ -384,6 +404,14 @@ action="store_false", dest="update", default=True, help="Leave svn working copy unchanged " " (do not update or checkout)") + parser.add_option("--no-version-check", + action="store_false", dest="check_versions", default=True, + help="Don't check the version strings that appear in " + "various files for correctness.") + parser.add_option("--no-date-check", + action="store_false", dest="check_dates", default=True, + help="Don't check the date strings that appear in " + "various files for correctness.") options, args = parser.parse_args() tag = False From jjlee at codespeak.net Sun Jul 15 00:51:32 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sun, 15 Jul 2007 00:51:32 +0200 (CEST) Subject: [wwwsearch-commits] r45095 - wwwsearch/mechanize/trunk/test Message-ID: <20070714225132.1B77F8179@code0.codespeak.net> Author: jjlee Date: Sun Jul 15 00:51:31 2007 New Revision: 45095 Modified: wwwsearch/mechanize/trunk/test/test_browser.py Log: Make test_browser.BrowserTests.test_empty() run with all factory classes Modified: wwwsearch/mechanize/trunk/test/test_browser.py ============================================================================== --- wwwsearch/mechanize/trunk/test/test_browser.py (original) +++ wwwsearch/mechanize/trunk/test/test_browser.py Sun Jul 15 00:51:31 2007 @@ -334,10 +334,14 @@ self.assertEqual(b.viewing_html(), expect) def test_empty(self): + for factory_class in FACTORY_CLASSES: + self._test_empty(factory_class()) + + def _test_empty(self, factory): import mechanize url = "http://example.com/" - b = TestBrowser() + b = TestBrowser(factory=factory) self.assert_(b.response() is None) From jjlee at codespeak.net Sun Jul 15 00:56:01 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sun, 15 Jul 2007 00:56:01 +0200 (CEST) Subject: [wwwsearch-commits] r45096 - in wwwsearch/mechanize/trunk: mechanize test Message-ID: <20070714225601.D1B018179@code0.codespeak.net> Author: jjlee Date: Sun Jul 15 00:56:01 2007 New Revision: 45096 Modified: wwwsearch/mechanize/trunk/mechanize/_html.py wwwsearch/mechanize/trunk/mechanize/_pullparser.py wwwsearch/mechanize/trunk/test/test_html.doctest Log: Make title parsing follow Firefox behaviour wrt child elements (previously the behaviour differed between Factory and RobustFactory). Modified: wwwsearch/mechanize/trunk/mechanize/_html.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_html.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_html.py Sun Jul 15 00:56:01 2007 @@ -17,6 +17,8 @@ DEFAULT_ENCODING = "latin-1" +COMPRESS_RE = re.compile(r"\s+") + # the base classe is purely for backwards compatibility class ParseError(ClientForm.ParseError): pass @@ -235,6 +237,30 @@ self._response = response self._encoding = encoding + def _get_title_text(self, parser): + text = [] + tok = None + while 1: + try: + tok = parser.get_token() + except NoMoreTokensError: + break + if tok.type == "data": + text.append(str(tok)) + elif tok.type == "entityref": + t = unescape("&%s;" % tok.data, + parser._entitydefs, parser.encoding) + text.append(t) + elif tok.type == "charref": + t = unescape_charref(tok.data, parser.encoding) + text.append(t) + elif tok.type in ["starttag", "endtag", "startendtag"]: + tag_name = tok.data + if tok.type == "endtag" and tag_name == "title": + break + text.append(str(tok)) + return COMPRESS_RE.sub(" ", "".join(text).strip()) + def title(self): import _pullparser p = _pullparser.TolerantPullParser( @@ -245,7 +271,7 @@ except _pullparser.NoMoreTokensError: return None else: - return p.get_text() + return self._get_title_text(p) except sgmllib.SGMLParseError, exc: raise ParseError(exc) @@ -328,7 +354,7 @@ class RobustLinksFactory: - compress_re = re.compile(r"\s+") + compress_re = COMPRESS_RE def __init__(self, link_parser_class=None, @@ -418,7 +444,8 @@ if title == _beautifulsoup.Null: return None else: - return title.firstText(lambda t: True) + inner_html = "".join([str(node) for node in title.contents]) + return COMPRESS_RE.sub(" ", inner_html.strip()) class Factory: Modified: wwwsearch/mechanize/trunk/mechanize/_pullparser.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_pullparser.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_pullparser.py Sun Jul 15 00:56:01 2007 @@ -35,6 +35,7 @@ import re, htmlentitydefs import sgmllib, HTMLParser +from xml.sax import saxutils from _html import unescape, unescape_charref @@ -85,6 +86,60 @@ args = ", ".join(map(repr, [self.type, self.data, self.attrs])) return self.__class__.__name__+"(%s)" % args + def __str__(self): + """ + >>> print Token("starttag", "br") +
+ >>> print Token("starttag", "a", + ... [("href", "http://www.python.org/"), ("alt", '"foo"')]) + + >>> print Token("startendtag", "br") +
+ >>> print Token("startendtag", "br", [("spam", "eggs")]) +
+ >>> print Token("endtag", "p") + + >>> print Token("charref", "38") + & + >>> print Token("entityref", "amp") + & + >>> print Token("data", "foo\\nbar") + foo + bar + >>> print Token("comment", "Life is a bowl\\nof cherries.") + + >>> print Token("decl", "decl") + + >>> print Token("pi", "pi") + + """ + if self.attrs is not None: + attrs = "".join([" %s=%s" % (k, saxutils.quoteattr(v)) for + k, v in self.attrs]) + else: + attrs = "" + if self.type == "starttag": + return "<%s%s>" % (self.data, attrs) + elif self.type == "startendtag": + return "<%s%s />" % (self.data, attrs) + elif self.type == "endtag": + return "%s>" % self.data + elif self.type == "charref": + return "%s;" % self.data + elif self.type == "entityref": + return "&%s;" % self.data + elif self.type == "data": + return self.data + elif self.type == "comment": + return "" % self.data + elif self.type == "decl": + return "" % self.data + elif self.type == "pi": + return "%s>" % self.data + assert False + + def iter_until_exception(fn, exception, *args, **kwds): while 1: try: Modified: wwwsearch/mechanize/trunk/test/test_html.doctest ============================================================================== --- wwwsearch/mechanize/trunk/test/test_html.doctest (original) +++ wwwsearch/mechanize/trunk/test/test_html.doctest Sun Jul 15 00:56:01 2007 @@ -213,3 +213,41 @@ None >>> print get_first_link_text_sgmllib(html) None + + +Title parsing. We follow Firefox's behaviour with regard to child +elements (haven't tested IE). + +>>> def get_title_bs(html): +... factory = RobustTitleFactory() +... soup = MechanizeBs("utf-8", html) +... factory.set_soup(soup, "utf-8") +... return factory.title() + +>>> def get_title_sgmllib(html): +... factory = TitleFactory() +... response = test_html_response(html) +... factory.set_response(response, "utf-8") +... return factory.title() + +>>> html = ("""\ +... +...Title +...Blah.
+... """) +>>> get_title_bs(html) +'Title' +>>> get_title_sgmllib(html) +'Title' + +>>> html = ("""\ +...
+...Ti +... tle && +... +...Blah.
+... """) +>>> get_title_bs(html) +'Ti tle &&' +>>> get_title_sgmllib(html) +'Ti tle &&' From jjlee at codespeak.net Fri Jul 27 20:26:06 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Fri, 27 Jul 2007 20:26:06 +0200 (CEST) Subject: [wwwsearch-commits] r45412 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20070727182606.87D9B8341@code0.codespeak.net> Author: jjlee Date: Fri Jul 27 20:26:06 2007 New Revision: 45412 Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py Log: Remove some out-of-date docstring text. Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Fri Jul 27 20:26:06 2007 @@ -163,13 +163,7 @@ self.follow_link = self.find_link = None def set_handle_referer(self, handle): - """Set whether to add Referer header to each request. - - This base class does not implement this feature (so don't turn this on - if you're using this base class directly), but the subclass - mechanize.Browser does. - - """ + """Set whether to add Referer header to each request.""" self._set_handler("_referer", handle) self._handle_referer = bool(handle) From jjlee at codespeak.net Fri Jul 27 20:29:01 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Fri, 27 Jul 2007 20:29:01 +0200 (CEST) Subject: [wwwsearch-commits] r45413 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20070727182901.3C8D082E2@code0.codespeak.net> Author: jjlee Date: Fri Jul 27 20:29:00 2007 New Revision: 45413 Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py Log: Docstring grammar fix. Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Fri Jul 27 20:29:00 2007 @@ -190,8 +190,8 @@ def open_novisit(self, url, data=None): """Open a URL without visiting it. - The browser state (including .request, .response(), history, forms and - links) are all left unchanged by calling this function. + Browser state (including request, response, history, forms and links) + is left unchanged by calling this function. The interface is the same as for .open(). From jjlee at codespeak.net Sat Jul 28 20:28:56 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sat, 28 Jul 2007 20:28:56 +0200 (CEST) Subject: [wwwsearch-commits] r45426 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20070728182856.732288065@code0.codespeak.net> Author: jjlee Date: Sat Jul 28 20:28:55 2007 New Revision: 45426 Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py Log: Update .title() docstring to reflect recent change to behaviour (treatment of tags). Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Sat Jul 28 20:28:55 2007 @@ -451,10 +451,10 @@ return self._factory.encoding def title(self): - """Return title, or None if there is no title element in the document. + r"""Return title, or None if there is no title element in the document. - Tags are stripped or textified as described in docs for - PullParser.get_text() method of pullparser module. + Treatment of any tag children of attempts to follow Firefox and IE + (currently, tags are preserved). """ if not self.viewing_html(): From jjlee at codespeak.net Sun Sep 2 22:01:44 2007 From: jjlee at codespeak.net (jjlee at codespeak.net) Date: Sun, 2 Sep 2007 22:01:44 +0200 (CEST) Subject: [wwwsearch-commits] r46246 - wwwsearch/mechanize/trunk/mechanize Message-ID: <20070902200144.7C0DF81E1@code0.codespeak.net> Author: jjlee Date: Sun Sep 2 22:01:42 2007 New Revision: 46246 Modified: wwwsearch/mechanize/trunk/mechanize/_clientcookie.py wwwsearch/mechanize/trunk/mechanize/_http.py wwwsearch/mechanize/trunk/mechanize/_mechanize.py wwwsearch/mechanize/trunk/mechanize/_msiecookiejar.py wwwsearch/mechanize/trunk/mechanize/_pullparser.py wwwsearch/mechanize/trunk/mechanize/_useragent.py wwwsearch/mechanize/trunk/mechanize/_util.py Log: Fix some too-long lines. Modified: wwwsearch/mechanize/trunk/mechanize/_clientcookie.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_clientcookie.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_clientcookie.py Sun Sep 2 22:01:42 2007 @@ -465,8 +465,8 @@ Note that domain_return_ok is called for every *cookie* domain, not just for the *request* domain. For example, the function might be - called with both ".acme.com" and "www.acme.com" if the request domain is - "www.acme.com". The same goes for path_return_ok. + called with both ".acme.com" and "www.acme.com" if the request domain + is "www.acme.com". The same goes for path_return_ok. For argument documentation, see the docstring for return_ok. @@ -818,7 +818,8 @@ # done by domain_return_ok. debug(" - checking cookie %s", cookie) - for n in "version", "verifiability", "secure", "expires", "port", "domain": + for n in ("version", "verifiability", "secure", "expires", "port", + "domain"): fn_name = "return_ok_"+n fn = getattr(self, fn_name) if not fn(cookie, request): Modified: wwwsearch/mechanize/trunk/mechanize/_http.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_http.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_http.py Sun Sep 2 22:01:42 2007 @@ -300,7 +300,8 @@ if is_html(ct_hdrs, url, self._allow_xhtml): try: try: - html_headers = parse_head(response, self.head_parser_class()) + html_headers = parse_head(response, + self.head_parser_class()) finally: response.seek(0) except (HTMLParser.HTMLParseError, Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Sun Sep 2 22:01:42 2007 @@ -425,8 +425,8 @@ """Return the global form object, or None if the factory implementation did not supply one. - The "global" form object contains all controls that are not descendants of - any FORM element. + The "global" form object contains all controls that are not descendants + of any FORM element. The returned form object implements the ClientForm.HTMLForm interface. Modified: wwwsearch/mechanize/trunk/mechanize/_msiecookiejar.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_msiecookiejar.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_msiecookiejar.py Sun Sep 2 22:01:42 2007 @@ -114,8 +114,9 @@ domain = m.group(1) path = m.group(2) - cookies.append({"KEY": key, "VALUE": value, "DOMAIN": domain, - "PATH": path, "FLAGS": flags, "HIXP": hi_expire, + cookies.append({"KEY": key, "VALUE": value, + "DOMAIN": domain, "PATH": path, + "FLAGS": flags, "HIXP": hi_expire, "LOXP": lo_expire, "HICREATE": hi_create, "LOCREATE": lo_create}) finally: Modified: wwwsearch/mechanize/trunk/mechanize/_pullparser.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_pullparser.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_pullparser.py Sun Sep 2 22:01:42 2007 @@ -205,7 +205,8 @@ return iter_until_exception(self.get_tag, NoMoreTokensError, *names) def tokens(self, *tokentypes): - return iter_until_exception(self.get_token, NoMoreTokensError, *tokentypes) + return iter_until_exception(self.get_token, NoMoreTokensError, + *tokentypes) def next(self): try: Modified: wwwsearch/mechanize/trunk/mechanize/_useragent.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_useragent.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_useragent.py Sun Sep 2 22:01:42 2007 @@ -306,7 +306,8 @@ if obj is not None: newhandler = handler_class(obj) else: - newhandler = handler_class(*constructor_args, **constructor_kwds) + newhandler = handler_class( + *constructor_args, **constructor_kwds) else: newhandler = None self._replace_handler(name, newhandler) Modified: wwwsearch/mechanize/trunk/mechanize/_util.py ============================================================================== --- wwwsearch/mechanize/trunk/mechanize/_util.py (original) +++ wwwsearch/mechanize/trunk/mechanize/_util.py Sun Sep 2 22:01:42 2007 @@ -156,7 +156,8 @@ return t -strict_re = re.compile(r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) (\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$") +strict_re = re.compile(r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " + r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$") wkday_re = re.compile( r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I) loose_http_re = re.compile( From wwwsearch-commits at codespeak.net Thu Sep 20 22:28:13 2007 From: wwwsearch-commits at codespeak.net (Viagra.com Inc) Date: Thu, 20 Sep 2007 22:28:13 +0200 (CEST) Subject: [wwwsearch-commits] Lovers package at discount price! Message-ID: <20070920112945.3458.qmail@host30-221-dynamic.11-79-r.retail.telecomitalia.it> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/wwwsearch-commits/attachments/20070920/dc49e7f1/attachment.htm From wwwsearch-commits at codespeak.net Wed Sep 26 18:44:14 2007 From: wwwsearch-commits at codespeak.net (Viagra.com Inc) Date: Wed, 26 Sep 2007 18:44:14 +0200 (CEST) Subject: [wwwsearch-commits] September 70% OFF Message-ID: <20070926074620.13974.qmail@AOrleans-258-1-116-100.w90-21.abo.wanadoo.fr> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/wwwsearch-commits/attachments/20070926/96df9694/attachment.htm