[wwwsearch-commits] r19181 - in wwwsearch/mechanize/trunk: .
mechanize
jjlee at codespeak.net
jjlee at codespeak.net
Sun Oct 30 16:54:19 CET 2005
Author: jjlee
Date: Sun Oct 30 16:54:18 2005
New Revision: 19181
Modified:
wwwsearch/mechanize/trunk/mechanize/_mechanize.py
wwwsearch/mechanize/trunk/test.py
Log:
Fix .viewing_html(); Clarify some comments and an exception message
Modified: wwwsearch/mechanize/trunk/mechanize/_mechanize.py
==============================================================================
--- wwwsearch/mechanize/trunk/mechanize/_mechanize.py (original)
+++ wwwsearch/mechanize/trunk/mechanize/_mechanize.py Sun Oct 30 16:54:18 2005
@@ -286,6 +286,8 @@
tag = token.data
name = attrs.get("name")
text = None
+ # XXX need to sort out quoting
+ #url = urllib.quote_plus(attrs.get(self.urltags[tag]))
url = attrs.get(self.urltags[tag])
if tag == "a":
if token.type != "startendtag":
@@ -330,9 +332,18 @@
"""Return whether the current response contains HTML data."""
if self._response is None:
raise BrowserStateError("not viewing any document")
- ct = self._response.info().getheaders("content-type")
- return ct and (ct[0].startswith("text/html") or
- ct[0].startswith("text/xhtml"))
+ ct_hdrs = self._response.info().getheaders("content-type")
+ if not ct_hdrs:
+ # guess
+ url = self._response.geturl()
+ return (url.endswith('.htm') or url.endswith('.html') or
+ url.endswith('.xhtml'))
+ # use first header
+ ct = split_header_words(ct_hdrs)[0][0][0]
+ return ct in [
+ "text/html", "text/xhtml", "text/xml",
+ "application/xml", "application/xhtml+xml",
+ ]
def title(self):
"""Return title, or None if there is no title element in the document.
@@ -357,7 +368,7 @@
def select_form(self, name=None, predicate=None, nr=None):
"""Select an HTML form for input.
- This is like giving a form the "input focus" in a browser.
+ This is a bit like giving a form the "input focus" in a browser.
If a form is selected, the object supports the HTMLForm interface, so
you can call methods like .set_value(), .set(), and .click().
@@ -524,9 +535,11 @@
if form is not None:
try: return getattr(form, name)
except AttributeError: pass
- raise AttributeError("%s instance has no attribute %s "
- "(perhaps you forgot to .select_form()?" %
- (self.__class__, name))
+
+ msg = "%s instance has no attribute %s " % (self.__class__, name)
+ if form is None:
+ msg += "(perhaps you forgot to .select_form()?)"
+ raise AttributeError(msg)
#---------------------------------------------------
# Private methods.
@@ -601,9 +614,10 @@
return self.default_encoding
def _parse_html(self, response):
+ # this is now lazy, so we just reset the various attributes that
+ # result from parsing
self.form = None
self._title = None
if not self.viewing_html():
- # nothing to see here
return
self._forms = self._links = None
Modified: wwwsearch/mechanize/trunk/test.py
==============================================================================
--- wwwsearch/mechanize/trunk/test.py (original)
+++ wwwsearch/mechanize/trunk/test.py Sun Oct 30 16:54:18 2005
@@ -180,6 +180,46 @@
self.assert_(b.back(2) is r5)
self.assertRaises(mechanize.BrowserStateError, b.back, 2)
+ def test_viewing_html(self):
+ # XXX not testing multiple Content-Type headers
+ import mechanize
+ url = "http://example.com/"
+
+ for ct, isHtml in [
+ (None, False),
+ ("text/plain", False),
+ ("text/html", True),
+ ("text/xhtml", True),
+ ("text/xml", True),
+ ("application/xml", True),
+ ("application/xhtml+xml", True),
+ ("text/html; charset=blah", True),
+ (" text/xml ; charset=ook ", True),
+ ]:
+ b = TestBrowser()
+ hdrs = {}
+ if ct is not None:
+ hdrs["Content-Type"] = ct
+ b.add_handler(MockHandler([("http_open",
+ MockResponse(url, "", hdrs))]))
+ r = b.open(url)
+ self.assertEqual(b.viewing_html(), isHtml)
+
+ for ext, isHtml in [
+ (".htm", True),
+ (".html", True),
+ (".xhtml", True),
+ (".txt", False),
+ (".xml", False), # XXX is this sensible?
+ ("", False),
+ ]:
+ b = TestBrowser()
+ url = "http://example.com/foo"+ext
+ b.add_handler(MockHandler(
+ [("http_open", MockResponse(url, "", {}))]))
+ r = b.open(url)
+ self.assertEqual(b.viewing_html(), isHtml)
+
def test_empty(self):
import mechanize
url = "http://example.com/"
More information about the wwwsearch-commits
mailing list