[wwwsearch-commits] r37045 - wwwsearch/ClientForm/trunk

jjlee at codespeak.net jjlee at codespeak.net
Sat Jan 20 01:09:30 CET 2007


Author: jjlee
Date: Sat Jan 20 01:09:28 2007
New Revision: 37045

Modified:
   wwwsearch/ClientForm/trunk/ClientForm.py
   wwwsearch/ClientForm/trunk/test.py
Log:
Fix entity ref double-decoding bug (dmoews at fastmail.fm, bshanks at ucsd.edu)

Modified: wwwsearch/ClientForm/trunk/ClientForm.py
==============================================================================
--- wwwsearch/ClientForm/trunk/ClientForm.py	(original)
+++ wwwsearch/ClientForm/trunk/ClientForm.py	Sat Jan 20 01:09:28 2007
@@ -488,7 +488,7 @@
         debug("%s", attrs)
         for key, value in attrs:
             if key == "href":
-                self.base = value
+                self.base = self.unescape_attr_if_required(value)
 
     def end_body(self):
         debug("")
@@ -508,14 +508,14 @@
         d = {}
         for key, value in attrs:
             if key == "name":
-                name = value
+                name = self.unescape_attr_if_required(value)
             elif key == "action":
-                action = value
+                action = self.unescape_attr_if_required(value)
             elif key == "method":
-                method = value.upper()
+                method = self.unescape_attr_if_required(value.upper())
             elif key == "enctype":
-                enctype = value.lower()
-            d[key] = value
+                enctype = self.unescape_attr_if_required(value.lower())
+            d[key] = self.unescape_attr_if_required(value)
         controls = []
         self._current_form = (name, action, method, enctype), d, controls
 
@@ -536,7 +536,7 @@
             raise ParseError("SELECT inside TEXTAREA")
         d = {}
         for key, val in attrs:
-            d[key] = val
+            d[key] = self.unescape_attr_if_required(val)
 
         self._select = d
         self._add_label(d)
@@ -561,7 +561,7 @@
             raise ParseError("OPTGROUP outside of SELECT")
         d = {}
         for key, val in attrs:
-            d[key] = val
+            d[key] = self.unescape_attr_if_required(val)
 
         self._optgroup = d
 
@@ -580,7 +580,7 @@
 
         d = {}
         for key, val in attrs:
-            d[key] = val
+            d[key] = self.unescape_attr_if_required(val)
 
         self._option = {}
         self._option.update(d)
@@ -619,7 +619,7 @@
             raise ParseError("TEXTAREA inside SELECT")
         d = {}
         for key, val in attrs:
-            d[key] = val
+            d[key] = self.unescape_attr_if_required(val)
         self._add_label(d)
 
         self._textarea = d
@@ -641,7 +641,7 @@
             self.end_label()
         d = {}
         for key, val in attrs:
-            d[key] = val
+            d[key] = self.unescape_attr_if_required(val)
         taken = bool(d.get("for"))  # empty id is invalid
         d["__text"] = ""
         d["__taken"] = taken
@@ -709,7 +709,7 @@
         d = {}
         d["type"] = "submit"  # default
         for key, val in attrs:
-            d[key] = val
+            d[key] = self.unescape_attr_if_required(val)
         controls = self._current_form[2]
 
         type = d["type"]
@@ -727,7 +727,7 @@
         d = {}
         d["type"] = "text"  # default
         for key, val in attrs:
-            d[key] = val
+            d[key] = self.unescape_attr_if_required(val)
         controls = self._current_form[2]
 
         type = d["type"]
@@ -739,7 +739,7 @@
         debug("%s", attrs)
         d = {}
         for key, val in attrs:
-            d[key] = val
+            d[key] = self.unescape_attr_if_required(val)
         controls = self._current_form[2]
 
         self._add_label(d)
@@ -1113,9 +1113,6 @@
             action = base_uri
         else:
             action = _urljoin(base_uri, action)
-        action = fp.unescape_attr_if_required(action)
-        name = fp.unescape_attr_if_required(name)
-        attrs = fp.unescape_attrs_if_required(attrs)
         # would be nice to make HTMLForm class (form builder) pluggable
         form = HTMLForm(
             action, method, enctype, name, attrs, request_class,
@@ -1124,8 +1121,6 @@
         form._urlunparse = _urlunparse
         for ii in range(len(controls)):
             type, name, attrs = controls[ii]
-            attrs = fp.unescape_attrs_if_required(attrs)
-            name = fp.unescape_attr_if_required(name)
             # index=ii*10 allows ImageControl to return multiple ordered pairs
             form.new_control(type, name, attrs, select_default=select_default,
                              index=ii*10)

Modified: wwwsearch/ClientForm/trunk/test.py
==============================================================================
--- wwwsearch/ClientForm/trunk/test.py	(original)
+++ wwwsearch/ClientForm/trunk/test.py	Sat Jan 20 01:09:28 2007
@@ -135,19 +135,38 @@
 
     def test_unescape_parsing(self):
         file = StringIO(
-"""<form action="&amp;&mdash;&#x2014;&#8212;">
-<textarea name="name&amp;&mdash;&#x2014;&#8212;">val&amp;&mdash;&#x2014;&#8212;</textarea>
+"""<form action="&amp;amp;&mdash;&#x2014;&#8212;">
+<textarea name="name&amp;amp;&mdash;&#x2014;&#8212;">val&amp;amp;&mdash;&#x2014;&#8212;</textarea>
 </form>
 """)  #"
         forms = ClientForm.ParseFile(file, "http://localhost/",
                                      backwards_compat=False, encoding="utf-8")
         form = forms[0]
-        test_string = "&"+(u"\u2014".encode('utf8')*3)
+        test_string = "&amp;"+(u"\u2014".encode('utf8')*3)
         self.assertEqual(form.action, "http://localhost/"+test_string)
         control = form.find_control(type="textarea", nr=0)
         self.assertEqual(control.value, "val"+test_string)
         self.assertEqual(control.name, "name"+test_string)
 
+    def test_unescape_parsing_select(self):
+        f = StringIO("""\
+<form>
+<select name="a">
+    <option>1&amp;amp;&mdash;&#x2014;&#8212;</option>
+    <option value="2&amp;amp;&mdash;&#x2014;&#8212;">2&amp;amp;&mdash;&#x2014;&#8212;</option>
+</select>
+</form>
+""")  #"
+        forms = ClientForm.ParseFileEx(
+            f, "http://localhost/", encoding="utf-8")
+        form = forms[1]
+        test_string = "&amp;"+(u"\u2014".encode('utf8')*3)
+        control = form.find_control(nr=0)
+        for ii in range(len(control.items)):
+            item = control.items[ii]
+            self.assertEqual(item.name, str(ii+1)+test_string)
+            # XXX label
+
     def test_unescape_parsing_data(self):
         file = StringIO(
 """\
@@ -403,10 +422,7 @@
         entity_ctl = form.find_control(type="textarea", nr=2)
         self.assertEqual(entity_ctl.name, '"ta"')
         self.assertEqual(entity_ctl.attrs["id"], "foo&amp;bar")
-
-        # sgmllib gets this wrong (unescapes twice) (module HTMLParser does
-        # it right)
-        #self.assertEqual(entity_ctl.value, "Hello testers &amp; users!")
+        self.assertEqual(entity_ctl.value, "Hello testers &amp; users!")
 
     def testSelect(self):
         file = StringIO(


More information about the wwwsearch-commits mailing list