[wwwsearch-commits] r33024 - wwwsearch/ClientForm/trunk
jjlee at codespeak.net
jjlee at codespeak.net
Mon Oct 9 00:36:56 CEST 2006
Author: jjlee
Date: Mon Oct 9 00:36:53 2006
New Revision: 33024
Modified:
wwwsearch/ClientForm/trunk/ClientForm.py
Log:
Allow mechanize to supply URL join / parse / unparse functions, to allow mechanize follow RFC 3986, thus fixing some URL processing bugs. ClientForm should do the same, probably I should merge the two projects after final mechanize release.
Modified: wwwsearch/ClientForm/trunk/ClientForm.py
==============================================================================
--- wwwsearch/ClientForm/trunk/ClientForm.py (original)
+++ wwwsearch/ClientForm/trunk/ClientForm.py Mon Oct 9 00:36:53 2006
@@ -104,7 +104,6 @@
import sys, urllib, urllib2, types, mimetools, copy, urlparse, \
htmlentitydefs, re, random
-from urlparse import urljoin
from cStringIO import StringIO
try:
@@ -850,6 +849,11 @@
entitydefs=None,
backwards_compat=True,
encoding=DEFAULT_ENCODING,
+
+ # private
+ _urljoin=urlparse.urljoin,
+ _urlparse=urlparse.urlparse,
+ _urlunparse=urlparse.urlunparse,
):
"""Parse HTTP response and return a list of HTMLForm instances.
@@ -917,6 +921,9 @@
entitydefs,
backwards_compat,
encoding,
+ _urljoin=_urljoin,
+ _urlparse=_urlparse,
+ _urlunparse=_urlunparse,
)
def ParseFile(file, base_uri, select_default=False,
@@ -926,6 +933,13 @@
entitydefs=None,
backwards_compat=True,
encoding=DEFAULT_ENCODING,
+
+ # these private arguments ars here as a hack to allow mechanize
+ # to follow RFC 3986. ClientForm should do the same really --
+ # perhaps it's time to merge ClientForm with mechanize...
+ _urljoin=urlparse.urljoin,
+ _urlparse=urlparse.urlparse,
+ _urlunparse=urlparse.urlunparse,
):
"""Parse HTML and return a list of HTMLForm instances.
@@ -970,7 +984,7 @@
if action is None:
action = base_uri
else:
- action = urljoin(base_uri, action)
+ action = _urljoin(base_uri, action)
action = fp.unescape_attr_if_required(action)
name = fp.unescape_attr_if_required(name)
attrs = fp.unescape_attrs_if_required(attrs)
@@ -978,6 +992,8 @@
form = HTMLForm(
action, method, enctype, name, attrs, request_class,
forms, labels, id_to_labels, backwards_compat)
+ form._urlparse = _urlparse
+ form._urlunparse = _urlunparse
for ii in range(len(controls)):
type, name, attrs = controls[ii]
attrs = fp.unescape_attrs_if_required(attrs)
@@ -1171,6 +1187,9 @@
self._clicked = False
+ self._urlparse = urlparse.urlparse
+ self._urlunparse = urlparse.urlunparse
+
def __getattr__(self, name):
if name == "value":
return self.__dict__["_value"]
@@ -1379,10 +1398,10 @@
# This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is
# deprecated in 4.01, but it should still say how to submit it).
# Submission of ISINDEX is explained in the HTML 3.2 spec, though.
- parts = urlparse.urlparse(form.action)
+ parts = self._urlparse(form.action)
rest, (query, frag) = parts[:-2], parts[-2:]
parts = rest + (urllib.quote_plus(self.value), "")
- url = urlparse.urlunparse(parts)
+ url = self._urlunparse(parts)
req_data = url, None, []
if return_type == "pairs":
@@ -2616,6 +2635,9 @@
self.backwards_compat = backwards_compat # note __setattr__
+ self._urlunparse = urlparse.urlunparse
+ self._urlparse = urlparse.urlparse
+
def __getattr__(self, name):
if name == "backwards_compat":
return self._backwards_compat
@@ -2674,6 +2696,8 @@
else:
control = klass(type, name, a, index)
control.add_to_form(self)
+ control._urlparse = self._urlparse
+ control._urlunparse = self._urlunparse
def fixup(self):
"""Normalise form after all controls have been added.
@@ -3162,7 +3186,7 @@
"""Return a tuple (url, data, headers)."""
method = self.method.upper()
#scheme, netloc, path, parameters, query, frag = urlparse.urlparse(self.action)
- parts = urlparse.urlparse(self.action)
+ parts = self._urlparse(self.action)
rest, (query, frag) = parts[:-2], parts[-2:]
if method == "GET":
@@ -3170,11 +3194,11 @@
raise ValueError(
"unknown GET form encoding type '%s'" % self.enctype)
parts = rest + (urlencode(self._pairs()), "")
- uri = urlparse.urlunparse(parts)
+ uri = self._urlunparse(parts)
return uri, None, []
elif method == "POST":
parts = rest + (query, "")
- uri = urlparse.urlunparse(parts)
+ uri = self._urlunparse(parts)
if self.enctype == "application/x-www-form-urlencoded":
return (uri, urlencode(self._pairs()),
[("Content-type", self.enctype)])
More information about the wwwsearch-commits
mailing list