[wwwsearch-commits] r33024 - wwwsearch/ClientForm/trunk

jjlee at codespeak.net jjlee at codespeak.net
Mon Oct 9 00:36:56 CEST 2006


Author: jjlee
Date: Mon Oct  9 00:36:53 2006
New Revision: 33024

Modified:
   wwwsearch/ClientForm/trunk/ClientForm.py
Log:
Allow mechanize to supply URL join / parse / unparse functions, to allow mechanize follow RFC 3986, thus fixing some URL processing bugs.  ClientForm should do the same, probably I should merge the two projects after final mechanize release.

Modified: wwwsearch/ClientForm/trunk/ClientForm.py
==============================================================================
--- wwwsearch/ClientForm/trunk/ClientForm.py	(original)
+++ wwwsearch/ClientForm/trunk/ClientForm.py	Mon Oct  9 00:36:53 2006
@@ -104,7 +104,6 @@
 
 import sys, urllib, urllib2, types, mimetools, copy, urlparse, \
        htmlentitydefs, re, random
-from urlparse import urljoin
 from cStringIO import StringIO
 
 try:
@@ -850,6 +849,11 @@
                   entitydefs=None,
                   backwards_compat=True,
                   encoding=DEFAULT_ENCODING,
+
+                  # private
+                  _urljoin=urlparse.urljoin,
+                  _urlparse=urlparse.urlparse,
+                  _urlunparse=urlparse.urlunparse,
                   ):
     """Parse HTTP response and return a list of HTMLForm instances.
 
@@ -917,6 +921,9 @@
                      entitydefs,
                      backwards_compat,
                      encoding,
+                     _urljoin=_urljoin,
+                     _urlparse=_urlparse,
+                     _urlunparse=_urlunparse,
                      )
 
 def ParseFile(file, base_uri, select_default=False,
@@ -926,6 +933,13 @@
               entitydefs=None,
               backwards_compat=True,
               encoding=DEFAULT_ENCODING,
+
+              # these private arguments ars here as a hack to allow mechanize
+              # to follow RFC 3986.  ClientForm should do the same really --
+              # perhaps it's time to merge ClientForm with mechanize...
+              _urljoin=urlparse.urljoin,
+              _urlparse=urlparse.urlparse,
+              _urlunparse=urlparse.urlunparse,
               ):
     """Parse HTML and return a list of HTMLForm instances.
 
@@ -970,7 +984,7 @@
         if action is None:
             action = base_uri
         else:
-            action = urljoin(base_uri, action)
+            action = _urljoin(base_uri, action)
         action = fp.unescape_attr_if_required(action)
         name = fp.unescape_attr_if_required(name)
         attrs = fp.unescape_attrs_if_required(attrs)
@@ -978,6 +992,8 @@
         form = HTMLForm(
             action, method, enctype, name, attrs, request_class,
             forms, labels, id_to_labels, backwards_compat)
+        form._urlparse = _urlparse
+        form._urlunparse = _urlunparse
         for ii in range(len(controls)):
             type, name, attrs = controls[ii]
             attrs = fp.unescape_attrs_if_required(attrs)
@@ -1171,6 +1187,9 @@
 
         self._clicked = False
 
+        self._urlparse = urlparse.urlparse
+        self._urlunparse = urlparse.urlunparse
+
     def __getattr__(self, name):
         if name == "value":
             return self.__dict__["_value"]
@@ -1379,10 +1398,10 @@
         # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is
         # deprecated in 4.01, but it should still say how to submit it).
         # Submission of ISINDEX is explained in the HTML 3.2 spec, though.
-        parts = urlparse.urlparse(form.action)
+        parts = self._urlparse(form.action)
         rest, (query, frag) = parts[:-2], parts[-2:]
         parts = rest + (urllib.quote_plus(self.value), "")
-        url = urlparse.urlunparse(parts)
+        url = self._urlunparse(parts)
         req_data = url, None, []
 
         if return_type == "pairs":
@@ -2616,6 +2635,9 @@
 
         self.backwards_compat = backwards_compat  # note __setattr__
 
+        self._urlunparse = urlparse.urlunparse
+        self._urlparse = urlparse.urlparse
+
     def __getattr__(self, name):
         if name == "backwards_compat":
             return self._backwards_compat
@@ -2674,6 +2696,8 @@
         else:
             control = klass(type, name, a, index)
         control.add_to_form(self)
+        control._urlparse = self._urlparse
+        control._urlunparse = self._urlunparse
 
     def fixup(self):
         """Normalise form after all controls have been added.
@@ -3162,7 +3186,7 @@
         """Return a tuple (url, data, headers)."""
         method = self.method.upper()
         #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(self.action)
-        parts = urlparse.urlparse(self.action)
+        parts = self._urlparse(self.action)
         rest, (query, frag) = parts[:-2], parts[-2:]
 
         if method == "GET":
@@ -3170,11 +3194,11 @@
                 raise ValueError(
                     "unknown GET form encoding type '%s'" % self.enctype)
             parts = rest + (urlencode(self._pairs()), "")
-            uri = urlparse.urlunparse(parts)
+            uri = self._urlunparse(parts)
             return uri, None, []
         elif method == "POST":
             parts = rest + (query, "")
-            uri = urlparse.urlunparse(parts)
+            uri = self._urlunparse(parts)
             if self.enctype == "application/x-www-form-urlencoded":
                 return (uri, urlencode(self._pairs()),
                         [("Content-type", self.enctype)])


More information about the wwwsearch-commits mailing list