[wwwsearch-commits] r19185 -
wwwsearch/ClientCookie/trunk/ClientCookie
jjlee at codespeak.net
jjlee at codespeak.net
Sun Oct 30 17:06:24 CET 2005
Author: jjlee
Date: Sun Oct 30 17:06:23 2005
New Revision: 19185
Modified:
wwwsearch/ClientCookie/trunk/ClientCookie/_HeadersUtil.py
wwwsearch/ClientCookie/trunk/ClientCookie/_urllib2_support.py
Log:
Move is_html() code here from mechanize
Modified: wwwsearch/ClientCookie/trunk/ClientCookie/_HeadersUtil.py
==============================================================================
--- wwwsearch/ClientCookie/trunk/ClientCookie/_HeadersUtil.py (original)
+++ wwwsearch/ClientCookie/trunk/ClientCookie/_HeadersUtil.py Sun Oct 30 17:06:23 2005
@@ -24,6 +24,23 @@
True = 1
False = 0
+def is_html(ct_headers, url):
+ """
+ ct_headers: Sequence of Content-Type headers
+ url: Response URL
+
+ """
+ if not ct_headers:
+ # guess
+ return (url.endswith('.htm') or url.endswith('.html') or
+ url.endswith('.xhtml'))
+ # use first header
+ ct = split_header_words(ct_headers)[0][0][0]
+ return ct in [
+ "text/html", "text/xhtml", "text/xml",
+ "application/xml", "application/xhtml+xml",
+ ]
+
def unmatched(match):
"""Return unmatched part of re.Match object."""
start, end = match.span(0)
Modified: wwwsearch/ClientCookie/trunk/ClientCookie/_urllib2_support.py
==============================================================================
--- wwwsearch/ClientCookie/trunk/ClientCookie/_urllib2_support.py (original)
+++ wwwsearch/ClientCookie/trunk/ClientCookie/_urllib2_support.py Sun Oct 30 17:06:23 2005
@@ -16,6 +16,7 @@
import ClientCookie
from _ClientCookie import CookieJar, request_host
from _Util import isstringlike, startswith, getheaders
+from _HeadersUtil import is_html
from _Debug import getLogger
try: True
@@ -306,10 +307,9 @@
if not hasattr(response, "seek"):
response = response_seek_wrapper(response)
headers = response.info()
- ct = getheaders(response.info(), "content-type")
- html = ct and (ct[0].startswith("text/html") or
- ct[0].startswith("text/xhtml"))
- if html:
+ url = response.geturl()
+ ct_hdrs = getheaders(response.info(), "content-type")
+ if is_html(ct_hdrs, url):
try:
try:
html_headers = parse_head(response, self.head_parser_class())
More information about the wwwsearch-commits
mailing list