[wwwsearch-commits] r26688 - wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize

jjlee at codespeak.net jjlee at codespeak.net
Wed May 3 00:30:55 CEST 2006


Author: jjlee
Date: Wed May  3 00:30:54 2006
New Revision: 26688

Modified:
   wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize/_html.py
Log:
Fix some BeautifulSoup stuff and paper over encoding cracks

Modified: wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize/_html.py
==============================================================================
--- wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize/_html.py	(original)
+++ wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize/_html.py	Wed May  3 00:30:54 2006
@@ -365,7 +365,7 @@
                 attrs = bs.unescape_attrs(link.attrs)
                 attrs_dict = dict(attrs)
                 if link.name == "base":
-                    base_url = attrs_dict.get("href").encode(encoding)
+                    base_url = attrs_dict.get("href")
                     continue
                 url_attr = self.urltags[link.name]
                 url = attrs_dict.get(url_attr)
@@ -381,9 +381,7 @@
                         text = None
                 else:
                     text = self.compress_re.sub(" ", text.strip())
-                    text = text.encode(encoding)
-                linkname = link.name.encode(encoding)
-                yield Link(base_url, url, text, linkname, attrs)
+                yield Link(base_url, url, text, link.name, attrs)
 
 
 class RobustFormsFactory(FormsFactory):
@@ -547,7 +545,8 @@
     DefaultFactory.
 
     """
-    def __init__(self, i_want_broken_xhtml_support=False):
+    def __init__(self, i_want_broken_xhtml_support=False,
+                 soup_class=MechanizeBs):
         Factory.__init__(
             self,
             forms_factory=RobustFormsFactory(),
@@ -555,14 +554,14 @@
             title_factory=RobustTitleFactory(),
             is_html_p=make_is_html(allow_xhtml=i_want_broken_xhtml_support),
             )
-        self._bs = None
+        self._soup_class = soup_class
 
     def set_response(self, response):
         import BeautifulSoup
         Factory.set_response(self, response)
         if response is not None:
             data = response.read()
-            self._bs = self.link_parser_class(self.encoding, data)
+            soup = self._soup_class(self.encoding, data)
             self._forms_factory.set_response(response, self.encoding)
             self._links_factory.set_soup(
                 soup, response.geturl(), self.encoding)


More information about the wwwsearch-commits mailing list