[wwwsearch-commits] r37045 - wwwsearch/ClientForm/trunk
jjlee at codespeak.net
jjlee at codespeak.net
Sat Jan 20 01:09:30 CET 2007
Author: jjlee
Date: Sat Jan 20 01:09:28 2007
New Revision: 37045
Modified:
wwwsearch/ClientForm/trunk/ClientForm.py
wwwsearch/ClientForm/trunk/test.py
Log:
Fix entity ref double-decoding bug (dmoews at fastmail.fm, bshanks at ucsd.edu)
Modified: wwwsearch/ClientForm/trunk/ClientForm.py
==============================================================================
--- wwwsearch/ClientForm/trunk/ClientForm.py (original)
+++ wwwsearch/ClientForm/trunk/ClientForm.py Sat Jan 20 01:09:28 2007
@@ -488,7 +488,7 @@
debug("%s", attrs)
for key, value in attrs:
if key == "href":
- self.base = value
+ self.base = self.unescape_attr_if_required(value)
def end_body(self):
debug("")
@@ -508,14 +508,14 @@
d = {}
for key, value in attrs:
if key == "name":
- name = value
+ name = self.unescape_attr_if_required(value)
elif key == "action":
- action = value
+ action = self.unescape_attr_if_required(value)
elif key == "method":
- method = value.upper()
+ method = self.unescape_attr_if_required(value.upper())
elif key == "enctype":
- enctype = value.lower()
- d[key] = value
+ enctype = self.unescape_attr_if_required(value.lower())
+ d[key] = self.unescape_attr_if_required(value)
controls = []
self._current_form = (name, action, method, enctype), d, controls
@@ -536,7 +536,7 @@
raise ParseError("SELECT inside TEXTAREA")
d = {}
for key, val in attrs:
- d[key] = val
+ d[key] = self.unescape_attr_if_required(val)
self._select = d
self._add_label(d)
@@ -561,7 +561,7 @@
raise ParseError("OPTGROUP outside of SELECT")
d = {}
for key, val in attrs:
- d[key] = val
+ d[key] = self.unescape_attr_if_required(val)
self._optgroup = d
@@ -580,7 +580,7 @@
d = {}
for key, val in attrs:
- d[key] = val
+ d[key] = self.unescape_attr_if_required(val)
self._option = {}
self._option.update(d)
@@ -619,7 +619,7 @@
raise ParseError("TEXTAREA inside SELECT")
d = {}
for key, val in attrs:
- d[key] = val
+ d[key] = self.unescape_attr_if_required(val)
self._add_label(d)
self._textarea = d
@@ -641,7 +641,7 @@
self.end_label()
d = {}
for key, val in attrs:
- d[key] = val
+ d[key] = self.unescape_attr_if_required(val)
taken = bool(d.get("for")) # empty id is invalid
d["__text"] = ""
d["__taken"] = taken
@@ -709,7 +709,7 @@
d = {}
d["type"] = "submit" # default
for key, val in attrs:
- d[key] = val
+ d[key] = self.unescape_attr_if_required(val)
controls = self._current_form[2]
type = d["type"]
@@ -727,7 +727,7 @@
d = {}
d["type"] = "text" # default
for key, val in attrs:
- d[key] = val
+ d[key] = self.unescape_attr_if_required(val)
controls = self._current_form[2]
type = d["type"]
@@ -739,7 +739,7 @@
debug("%s", attrs)
d = {}
for key, val in attrs:
- d[key] = val
+ d[key] = self.unescape_attr_if_required(val)
controls = self._current_form[2]
self._add_label(d)
@@ -1113,9 +1113,6 @@
action = base_uri
else:
action = _urljoin(base_uri, action)
- action = fp.unescape_attr_if_required(action)
- name = fp.unescape_attr_if_required(name)
- attrs = fp.unescape_attrs_if_required(attrs)
# would be nice to make HTMLForm class (form builder) pluggable
form = HTMLForm(
action, method, enctype, name, attrs, request_class,
@@ -1124,8 +1121,6 @@
form._urlunparse = _urlunparse
for ii in range(len(controls)):
type, name, attrs = controls[ii]
- attrs = fp.unescape_attrs_if_required(attrs)
- name = fp.unescape_attr_if_required(name)
# index=ii*10 allows ImageControl to return multiple ordered pairs
form.new_control(type, name, attrs, select_default=select_default,
index=ii*10)
Modified: wwwsearch/ClientForm/trunk/test.py
==============================================================================
--- wwwsearch/ClientForm/trunk/test.py (original)
+++ wwwsearch/ClientForm/trunk/test.py Sat Jan 20 01:09:28 2007
@@ -135,19 +135,38 @@
def test_unescape_parsing(self):
file = StringIO(
-"""<form action="&———">
-<textarea name="name&———">val&———</textarea>
+"""<form action="&amp;———">
+<textarea name="name&amp;———">val&amp;———</textarea>
</form>
""") #"
forms = ClientForm.ParseFile(file, "http://localhost/",
backwards_compat=False, encoding="utf-8")
form = forms[0]
- test_string = "&"+(u"\u2014".encode('utf8')*3)
+ test_string = "&"+(u"\u2014".encode('utf8')*3)
self.assertEqual(form.action, "http://localhost/"+test_string)
control = form.find_control(type="textarea", nr=0)
self.assertEqual(control.value, "val"+test_string)
self.assertEqual(control.name, "name"+test_string)
+ def test_unescape_parsing_select(self):
+ f = StringIO("""\
+<form>
+<select name="a">
+ <option>1&amp;———</option>
+ <option value="2&amp;———">2&amp;———</option>
+</select>
+</form>
+""") #"
+ forms = ClientForm.ParseFileEx(
+ f, "http://localhost/", encoding="utf-8")
+ form = forms[1]
+ test_string = "&"+(u"\u2014".encode('utf8')*3)
+ control = form.find_control(nr=0)
+ for ii in range(len(control.items)):
+ item = control.items[ii]
+ self.assertEqual(item.name, str(ii+1)+test_string)
+ # XXX label
+
def test_unescape_parsing_data(self):
file = StringIO(
"""\
@@ -403,10 +422,7 @@
entity_ctl = form.find_control(type="textarea", nr=2)
self.assertEqual(entity_ctl.name, '"ta"')
self.assertEqual(entity_ctl.attrs["id"], "foo&bar")
-
- # sgmllib gets this wrong (unescapes twice) (module HTMLParser does
- # it right)
- #self.assertEqual(entity_ctl.value, "Hello testers & users!")
+ self.assertEqual(entity_ctl.value, "Hello testers & users!")
def testSelect(self):
file = StringIO(
More information about the wwwsearch-commits
mailing list