[wwwsearch-commits] r26607 - in wwwsearch/mechanize/branch/mechanize-0.1.0-devel: . mechanize

jjlee at codespeak.net jjlee at codespeak.net
Sun Apr 30 17:46:25 CEST 2006


Author: jjlee
Date: Sun Apr 30 17:46:22 2006
New Revision: 26607

Modified:
   wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize/__init__.py
   wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize/_useragent.py
   wwwsearch/mechanize/branch/mechanize-0.1.0-devel/test.py
Log:
Improve auth support

Modified: wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize/__init__.py
==============================================================================
--- wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize/__init__.py	(original)
+++ wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize/__init__.py	Sun Apr 30 17:46:22 2006
@@ -1,4 +1,4 @@
-from _useragent import UserAgent
+from _useragent import UserAgent, HTTPProxyPasswordMgr
 from _mechanize import Browser, \
      BrowserStateError, LinkNotFoundError, FormNotFoundError, \
      __version__

Modified: wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize/_useragent.py
==============================================================================
--- wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize/_useragent.py	(original)
+++ wwwsearch/mechanize/branch/mechanize-0.1.0-devel/mechanize/_useragent.py	Sun Apr 30 17:46:22 2006
@@ -34,6 +34,51 @@
     https_request = http_request
 
 
+class HTTPProxyPasswordMgr(urllib2.HTTPPasswordMgr):
+    # has default realm and host/port
+    def add_password(self, realm, uri, user, passwd):
+        # uri could be a single URI or a sequence
+        if uri is None or isinstance(uri, basestring):
+            uris = [uri]
+        else:
+            uris = uri
+        passwd_by_domain = self.passwd.setdefault(realm, {})
+        for uri in uris:
+            uri = self.reduce_uri(uri)
+            passwd_by_domain[uri] = (user, passwd)
+
+    def find_user_password(self, realm, authuri):
+        perms = [(realm, authuri), (None, authuri)]
+        # bleh, want default realm to take precedence over default
+        # URI/authority, hence this outer loop
+        for default_uri in False, True:
+            for realm, authuri in perms:
+                authinfo_by_domain = self.passwd.get(realm, {})
+                reduced_authuri = self.reduce_uri(authuri)
+                for uri, authinfo in authinfo_by_domain.iteritems():
+                    if uri is None and not default_uri:
+                        continue
+                    if self.is_suburi(uri, reduced_authuri):
+                        return authinfo
+                user, password = None, None
+
+                if user is not None:
+                    break
+        return user, password
+
+    def reduce_uri(self, uri):
+        if uri is None:
+            return None
+        return urllib2.HTTPPasswordMgr.reduce_uri(self, uri)
+
+    def is_suburi(self, base, test):
+        if base is None:
+            # default to the proxy's host/port
+            hostport, path = test
+            base = (hostport, "/")
+        return urllib2.HTTPPasswordMgr.is_suburi(self, base, test)
+
+
 class UserAgent(OpenerDirector):
     """Convenient user-agent class.
 
@@ -58,7 +103,6 @@
         "ftp": urllib2.FTPHandler,  # CacheFTPHandler is buggy in 2.3
         "file": urllib2.FileHandler,
         "gopher": urllib2.GopherHandler,
-        # XXX etc.
 
         # other handlers
         "_unknown": urllib2.UnknownHandler,
@@ -68,8 +112,8 @@
         "_http_default_error": urllib2.HTTPDefaultErrorHandler,
 
         # feature handlers
-        "_authen": urllib2.HTTPBasicAuthHandler,
-        # XXX rest of authentication stuff
+        "_basicauth": urllib2.HTTPBasicAuthHandler,
+        "_digestauth": urllib2.HTTPBasicAuthHandler,
         "_redirect": ClientCookie.HTTPRedirectHandler,
         "_cookies": ClientCookie.HTTPCookieProcessor,
         "_refresh": ClientCookie.HTTPRefreshProcessor,
@@ -77,7 +121,8 @@
         "_equiv": ClientCookie.HTTPEquivProcessor,
         "_seek": ClientCookie.SeekableProcessor,
         "_proxy": urllib2.ProxyHandler,
-        # XXX there's more to proxies, too
+        "_proxy_basicauth": urllib2.ProxyBasicAuthHandler,
+        "_proxy_digestauth": urllib2.ProxyDigestAuthHandler,
 
         # debug handlers
         "_debug_redirect": ClientCookie.HTTPRedirectDebugProcessor,
@@ -86,9 +131,14 @@
 
     default_schemes = ["http", "ftp", "file", "gopher"]
     default_others = ["_unknown", "_http_error", "_http_request_upgrade",
-                      "_http_default_error"]
-    default_features = ["_authen", "_redirect", "_cookies", "_refresh",
-                        "_referer", "_equiv", "_seek", "_proxy"]
+                      "_http_default_error",
+                      ]
+    default_features = ["_redirect", "_cookies", "_referer",
+                        "_refresh", "_equiv",
+                        "_basicauth", "_digestauth",
+                        "_proxy", "_proxy_basicauth", "_proxy_digestauth",
+                        "_seek",
+                        ]
     if hasattr(httplib, 'HTTPS'):
         handler_classes["https"] = ClientCookie.HTTPSHandler
         default_schemes.append("https")
@@ -99,21 +149,31 @@
     def __init__(self):
         OpenerDirector.__init__(self)
 
-        self._ua_handlers = {}
+        ua_handlers = self._ua_handlers = {}
         for scheme in (self.default_schemes+
                        self.default_others+
                        self.default_features):
             klass = self.handler_classes[scheme]
-            self._ua_handlers[scheme] = klass()
-        for handler in self._ua_handlers.itervalues():
+            ua_handlers[scheme] = klass()
+        for handler in ua_handlers.itervalues():
             self.add_handler(handler)
 
+        # Yuck.
         # Ensure correct default constructor args were passed to
-        # HTTPRefererProcessor and HTTPEquivProcessor.  Yuck.
-        if '_refresh' in self._ua_handlers:
+        # HTTPRefererProcessor and HTTPEquivProcessor.
+        if "_refresh" in ua_handlers:
             self.set_handle_refresh(True)
-        if '_equiv' in self._ua_handlers:
+        if "_equiv" in ua_handlers:
             self.set_handle_equiv(True)
+        # Ensure default password managers are installed.
+        pm = ppm = None
+        if "_basicauth" in ua_handlers or "_digestauth" in ua_handlers:
+            pm = urllib2.HTTPPasswordMgrWithDefaultRealm()
+        if ("_proxy_basicauth" in ua_handlers or
+            "_proxy_digestauth" in ua_handlers):
+            ppm = HTTPProxyPasswordMgr()
+        self.set_credentials(pm)
+        self.set_proxy_credentials(ppm)
 
         # special case, requires extra support from mechanize.Browser
         self._handle_referer = True
@@ -165,10 +225,35 @@
     def set_cookiejar(self, cookiejar):
         """Set a ClientCookie.CookieJar, or None."""
         self._set_handler("_cookies", obj=cookiejar)
+
+    # XXX could use Greg Stein's httpx for some of this instead?
+    # or httplib2??
+    def set_proxies(self, proxies):
+        """Set a dictionary mapping URL scheme to proxy specification, or None.
+
+        e.g. {'http': 'myproxy.example.com',
+              'ftp': 'joe:password at proxy.example.com:8080'}
+
+        """
+        self._set_handler("_proxy", obj=proxies)
+
+    def add_password(self, url, user, password, realm=None):
+        self._credentials.add_password(realm, url, user, password)
+    def add_proxy_password(self, user, password, hostport=None, realm=None):
+        self._proxy_credentials.add_password(realm, hostport, user, password)
+
+    # the following are rarely useful -- use add_password / add_proxy_password
+    # instead
     def set_credentials(self, credentials):
-        """Set a urllib2.HTTPPasswordMgr, or None."""
-        # XXX use Greg Stein's httpx instead?
-        self._set_handler("_authen", obj=credentials)
+        """Set a urllib2.HTTPPasswordMgrWithDefaultRealm, or None."""
+        self._credentials = credentials
+        self._set_handler("_basicauth", obj=credentials)
+        self._set_handler("_digestauth", obj=credentials)
+    def set_proxy_credentials(self, credentials):
+        """Set a mechanize.HTTPProxyPasswordMgr, or None."""
+        self._proxy_credentials = credentials
+        self._set_handler("_proxy_basicauth", obj=credentials)
+        self._set_handler("_proxy_digestauth", obj=credentials)
 
     # these methods all take a boolean parameter
     def set_handle_robots(self, handle):

Modified: wwwsearch/mechanize/branch/mechanize-0.1.0-devel/test.py
==============================================================================
--- wwwsearch/mechanize/branch/mechanize-0.1.0-devel/test.py	(original)
+++ wwwsearch/mechanize/branch/mechanize-0.1.0-devel/test.py	Sun Apr 30 17:46:22 2006
@@ -19,6 +19,104 @@
     FACTORY_CLASSES.append(mechanize.RobustFactory)
 
 
+def test_password_manager(self):
+    """
+    >>> mgr = mechanize.HTTPProxyPasswordMgr()
+    >>> add = mgr.add_password
+
+    >>> add("Some Realm", "http://example.com/", "joe", "password")
+    >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
+    >>> add("c", "http://example.com/foo", "foo", "ni")
+    >>> add("c", "http://example.com/bar", "bar", "nini")
+    >>> add("b", "http://example.com/", "first", "blah")
+    >>> add("b", "http://example.com/", "second", "spam")
+    >>> add("a", "http://example.com", "1", "a")
+    >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
+    >>> add("Some Realm", "d.example.com", "4", "d")
+    >>> add("Some Realm", "e.example.com:3128", "5", "e")
+
+    >>> mgr.find_user_password("Some Realm", "example.com")
+    ('joe', 'password')
+    >>> mgr.find_user_password("Some Realm", "http://example.com")
+    ('joe', 'password')
+    >>> mgr.find_user_password("Some Realm", "http://example.com/")
+    ('joe', 'password')
+    >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
+    ('joe', 'password')
+    >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
+    ('joe', 'password')
+    >>> mgr.find_user_password("c", "http://example.com/foo")
+    ('foo', 'ni')
+    >>> mgr.find_user_password("c", "http://example.com/bar")
+    ('bar', 'nini')
+
+    Currently, we use the highest-level path where more than one match:
+
+    >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
+    ('joe', 'password')
+
+    Use latest add_password() in case of conflict:
+
+    >>> mgr.find_user_password("b", "http://example.com/")
+    ('second', 'spam')
+
+    No special relationship between a.example.com and example.com:
+
+    >>> mgr.find_user_password("a", "http://example.com/")
+    ('1', 'a')
+    >>> mgr.find_user_password("a", "http://a.example.com/")
+    (None, None)
+
+    Ports:
+
+    >>> mgr.find_user_password("Some Realm", "c.example.com")
+    (None, None)
+    >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
+    ('3', 'c')
+    >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
+    ('3', 'c')
+    >>> mgr.find_user_password("Some Realm", "d.example.com")
+    ('4', 'd')
+    >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
+    ('5', 'e')
+
+
+    Now features specific to HTTPProxyPasswordMgr.
+
+    Default realm:
+
+    >>> mgr.find_user_password("d", "f.example.com")
+    (None, None)
+    >>> add(None, "f.example.com", "6", "f")
+    >>> mgr.find_user_password("d", "f.example.com")
+    ('6', 'f')
+
+    Default host/port:
+
+    >>> mgr.find_user_password("e", "g.example.com")
+    (None, None)
+    >>> add("e", None, "7", "g")
+    >>> mgr.find_user_password("e", "g.example.com")
+    ('7', 'g')
+
+    Default realm and host/port:
+
+    >>> mgr.find_user_password("f", "h.example.com")
+    (None, None)
+    >>> add(None, None, "8", "h")
+    >>> mgr.find_user_password("f", "h.example.com")
+    ('8', 'h')
+
+    Default realm beats default host/port:
+
+    >>> add("d", None, "9", "i")
+    >>> mgr.find_user_password("d", "f.example.com")
+    ('6', 'f')
+
+    """
+    pass
+
+
 class CachingGeneratorFunctionTests(TestCase):
 
     def _get_simple_cgenf(self, log):
@@ -703,5 +801,7 @@
         ua._set_handler("_blah", True)
 
 if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
     import unittest
     unittest.main()


More information about the wwwsearch-commits mailing list