[wwwsearch-commits] r26688 -
wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize
jjlee at codespeak.net
jjlee at codespeak.net
Wed May 3 00:30:55 CEST 2006
Author: jjlee
Date: Wed May 3 00:30:54 2006
New Revision: 26688
Modified:
wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize/_html.py
Log:
Fix some BeautifulSoup stuff and paper over encoding cracks
Modified: wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize/_html.py
==============================================================================
--- wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize/_html.py (original)
+++ wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize/_html.py Wed May 3 00:30:54 2006
@@ -365,7 +365,7 @@
attrs = bs.unescape_attrs(link.attrs)
attrs_dict = dict(attrs)
if link.name == "base":
- base_url = attrs_dict.get("href").encode(encoding)
+ base_url = attrs_dict.get("href")
continue
url_attr = self.urltags[link.name]
url = attrs_dict.get(url_attr)
@@ -381,9 +381,7 @@
text = None
else:
text = self.compress_re.sub(" ", text.strip())
- text = text.encode(encoding)
- linkname = link.name.encode(encoding)
- yield Link(base_url, url, text, linkname, attrs)
+ yield Link(base_url, url, text, link.name, attrs)
class RobustFormsFactory(FormsFactory):
@@ -547,7 +545,8 @@
DefaultFactory.
"""
- def __init__(self, i_want_broken_xhtml_support=False):
+ def __init__(self, i_want_broken_xhtml_support=False,
+ soup_class=MechanizeBs):
Factory.__init__(
self,
forms_factory=RobustFormsFactory(),
@@ -555,14 +554,14 @@
title_factory=RobustTitleFactory(),
is_html_p=make_is_html(allow_xhtml=i_want_broken_xhtml_support),
)
- self._bs = None
+ self._soup_class = soup_class
def set_response(self, response):
import BeautifulSoup
Factory.set_response(self, response)
if response is not None:
data = response.read()
- self._bs = self.link_parser_class(self.encoding, data)
+ soup = self._soup_class(self.encoding, data)
self._forms_factory.set_response(response, self.encoding)
self._links_factory.set_soup(
soup, response.geturl(), self.encoding)
More information about the wwwsearch-commits
mailing list