[wwwsearch-commits] r17843 - wwwsearch/ClientForm/trunk
jjlee at codespeak.net
jjlee at codespeak.net
Sat Sep 24 22:50:45 CEST 2005
Author: jjlee
Date: Sat Sep 24 22:50:43 2005
New Revision: 17843
Modified:
wwwsearch/ClientForm/trunk/ClientForm.py
wwwsearch/ClientForm/trunk/test.py
Log:
Turn ignore_ambiguity attribute into (semi-implemented, right now) backwards_compat attribute, for backwards-compatibilty with ClientForm 0.1 mode; .find_control() can now raise AmbiguityError; Whitespace normalisation
Modified: wwwsearch/ClientForm/trunk/ClientForm.py
==============================================================================
--- wwwsearch/ClientForm/trunk/ClientForm.py (original)
+++ wwwsearch/ClientForm/trunk/ClientForm.py Sat Sep 24 22:50:43 2005
@@ -27,12 +27,12 @@
"""
# XXXX
-# Turn ignore_ambiguity into general backwards-compat option?
+# Move Item.__str__ back into ListControl? Don't like str(item) ATM...
+# Turn backwards_compat into general backwards-compat option?
# -ignore ambiguity
-# -label matching is strict
+# -item label matching is strict
# -turning off individual items allowed even if disabled
# Check old test suite passes!
-# _find_control &c. and ambiguous control labels
# XXX
# Add some more functional tests
@@ -801,7 +801,7 @@
ignore_errors=False, # ignored!
form_parser_class=FormParser,
request_class=urllib2.Request,
- entitydefs=None, ignore_ambiguity=True):
+ entitydefs=None, backwards_compat=True):
"""Parse HTTP response and return a list of HTMLForm instances.
The return value of urllib2.urlopen can be conveniently passed to this
@@ -818,12 +818,24 @@
urllib2.Request)
entitydefs: mapping like {'&': '&', ...} containing HTML entity
definitions (a sensible default is used)
- ignore_ambiguity: boolean that determines how the form's label searches
- will be performed. If ignore_ambiguity is True, label searches that do
- not specify a nr (number or count) will always get the first match, even
- if other controls match. This is legacy behavior, and will be deprecated
- in a future release. If ignore_ambiguity is False, label searches that
- have ambiguous results will raise an AmbiguityError.
+
+ backwards_compat: boolean that determines whether the returned HTMLForm
+ objects are backwards-compatible with old code. If backwards_compat is True:
+
+ - ClientForm 0.1 code will continue to work as before.
+
+ - Label searches that do not specify a nr (number or count) will always
+ get the first match, even if other controls match. If
+ backwards_compat is False, label searches that have ambiguous results
+ will raise an AmbiguityError.
+
+ - Item label matching is done by strict string comparison rather than
+ substring matching.
+
+ - De-selecting individual list items is allowed even if the Item is
+ disabled.
+
+ The backwards_compat argument will be deprecated in a future release.
Pass a true value for select_default if you want the behaviour specified by
RFC 1866 (the HTML 2.0 standard), which is to select the first item in a
@@ -847,13 +859,13 @@
False,
form_parser_class,
request_class,
- entitydefs, ignore_ambiguity)
+ entitydefs, backwards_compat)
def ParseFile(file, base_uri, select_default=False,
ignore_errors=False, # ignored!
form_parser_class=FormParser,
request_class=urllib2.Request,
- entitydefs=None, ignore_ambiguity=True):
+ entitydefs=None, backwards_compat=True):
"""Parse HTML and return a list of HTMLForm instances.
ClientForm.ParseError is raised on parse errors.
@@ -902,7 +914,7 @@
# would be nice to make HTMLForm class (form builder) pluggable
form = HTMLForm(
action, method, enctype, name, attrs, request_class,
- forms, labels, id_to_labels, ignore_ambiguity)
+ forms, labels, id_to_labels, backwards_compat)
for type, name, attrs in controls:
attrs = fp.unescape_attrs_if_required(attrs)
name = fp.unescape_attr_if_required(name)
@@ -912,12 +924,16 @@
form.fixup()
return forms
+
class Label:
def __init__(self, attrs):
self.id = attrs.get("for")
self.text = compress_text(attrs.get("__text"))
self.attrs = attrs
+ def __str__(self):
+ return '<Label(id=%r, text=%r)>' % (self.id, self.text)
+
def _getLabel(attrs):
label = attrs.get("__label")
if label is not None:
@@ -1536,7 +1552,7 @@
def items_from_label(self, label, exclude_disabled=False):
"""Return all items that have labels containing the given label text.
-
+
Optionally excludes disabled items.
"""
@@ -1553,7 +1569,7 @@
def items_from_name(self, name, exclude_disabled=False):
"""Return all items that have names matching the given name.
-
+
Optionally excludes disabled items.
"""
@@ -1568,7 +1584,7 @@
nr is an optional 0-based index of the items matching the query.
If nr is the default None value and more than item is found, raises
- AmbiguityError (unless the HTMLForm instance's ignore_ambiguity
+ AmbiguityError (unless the HTMLForm instance's backwards_compat
attribute is true).
If no item is found, raises ItemNotFoundError.
@@ -1583,7 +1599,7 @@
method = self.items_from_label
else:
method = self.items_from_name
- if nr is None and self._form.ignore_ambiguity:
+ if nr is None and self._form.backwards_compat:
nr = 0 # :-/
return disambiguate(method(name, exclude_disabled), nr, name)
@@ -1627,6 +1643,7 @@
if self.readonly:
raise AttributeError("control '%s' is readonly" % self.name)
action == bool(action)
+ #compat = self._form.backwards_compat
if item.disabled:
raise AttributeError("item is disabled")
elif action != item.selected:
@@ -1854,7 +1871,7 @@
value is expected to be an iterable of strings that are substrings of
the item labels that should be selected. Ambiguous labels are accepted
- without complaint if the form's ignore_ambiguity is True; otherwise,
+ without complaint if the form's backwards_compat is True; otherwise,
it will not complain as long as all ambiguous labels share the same
item name (e.g. OPTION value).
@@ -1869,7 +1886,7 @@
for nn in value:
found = self.items_from_label(nn)
if len(found) > 1:
- if not self._form.ignore_ambiguity:
+ if not self._form.backwards_compat:
# ambiguous labels are fine as long as item names (e.g.
# OPTION values) are same
opt_name = found[0].name
@@ -1881,7 +1898,7 @@
for o in found:
# For the multiple-item case, we could try to be smarter,
# saving them up and trying to resolve, but that's too much.
- if self._form.ignore_ambiguity or o not in items:
+ if self._form.backwards_compat or o not in items:
items.append(o)
break
else: # all of them are used
@@ -2429,7 +2446,7 @@
name=None, attrs=None,
request_class=urllib2.Request,
forms=None, labels=None, id_to_labels=None,
- ignore_ambiguity=True):
+ backwards_compat=True):
"""
In the usual case, use ParseResponse (or ParseFile) to create new
HTMLForm objects.
@@ -2454,7 +2471,7 @@
self._forms = forms # this is a semi-public API!
self._labels = labels # this is a semi-public API!
self._id_to_labels = id_to_labels # this is a semi-public API!
- self.ignore_ambiguity = ignore_ambiguity
+ self.backwards_compat = backwards_compat
def new_control(self, type, name, attrs,
ignore_unknown=False, select_default=False):
@@ -2819,8 +2836,6 @@
(nr is None)):
raise ValueError(
"at least one argument must be supplied to specify control")
- if nr is None: nr = 0
-
return self._find_control(name, type, kind, id, label, predicate, nr)
#---------------------------------------------------
@@ -2851,10 +2866,14 @@
raise TypeError("control label must be string-like")
if (predicate is not None) and not callable(predicate):
raise TypeError("control predicate must be callable")
- if nr < 0: raise ValueError("control number must be a positive "
- "integer")
+ if (nr is not None) and nr < 0:
+ raise ValueError("control number must be a positive integer")
orig_nr = nr
+ found = None
+ ambiguous = False
+ if nr is None and self.backwards_compat:
+ nr = 0
for control in self.controls:
if name is not None and name != control.name:
@@ -2873,10 +2892,18 @@
break
else:
continue
- if nr:
- nr = nr - 1
+ if nr is not None:
+ if nr == 0:
+ return control # early exit: unambiguous due to nr
+ nr -= 1
continue
- return control
+ if found:
+ ambiguous = True
+ break
+ found = control
+
+ if found and not ambiguous:
+ return found
description = []
if name is not None: description.append("name '%s'" % name)
@@ -2888,7 +2915,12 @@
description.append("predicate %s" % predicate)
if orig_nr: description.append("nr %d" % orig_nr)
description = ", ".join(description)
- raise ControlNotFoundError("no control matching "+description)
+
+ if ambiguous:
+ raise AmbiguityError("more than one control matching "+description)
+ elif not found:
+ raise ControlNotFoundError("no control matching "+description)
+ assert False
def _click(self, name, type, id, label, nr, coord, return_type,
request_class=urllib2.Request):
Modified: wwwsearch/ClientForm/trunk/test.py
==============================================================================
--- wwwsearch/ClientForm/trunk/test.py (original)
+++ wwwsearch/ClientForm/trunk/test.py Sat Sep 24 22:50:43 2005
@@ -12,7 +12,7 @@
import ClientForm
from ClientForm import ControlNotFoundError, ItemNotFoundError, \
- ItemCountError, ParseError
+ ItemCountError, AmbiguityError, ParseError
# XXX
# Base control tests on ParseFile, so can use same tests for DOMForm and
@@ -63,7 +63,7 @@
self._forms = []
self._labels = []
self._id_to_labels = {}
- self.ignore_ambiguity = True
+ self.backwards_compat = True
self.controls = []
def find_control(self, name, type):
@@ -1743,6 +1743,105 @@
class FormTests(TestCase):
base_uri = "http://auth.athensams.net/"
+ def test_find_control(self):
+ f = StringIO("""\
+<form>
+ <label for="form.title"> Book Title </label></td>
+ <input type="text" id="form.title" name="form.title"
+ value="The Grapes of Wrath" />
+
+ <label for="form.quality">Book Quality</label></td>
+ <select id="form.quality" name="form.country">
+ <option>Good</option>
+ <option>Bad</option>
+ </select>
+
+ <label><input type="checkbox" id="form.genre.western" name="form.genre"
+ value="western" /> Western</label>
+ <label><input type="checkbox" id="form.genre.horror" name="form.genre"
+ value="horror" /> Horror</label>
+
+ <label for="form.password">Password</label>
+ <input type="password" id="pswd1" name="password" value="123" />
+ <input type="password" id="pswd2" name="password" value="123" />
+</form>
+""")
+ form = ClientForm.ParseFile(f, "http://example.com/")[0]
+ for compat in True, False:
+ form.backwards_compat = compat
+ fc = form.find_control
+
+ self.assertEqual(fc("form.title").id, "form.title")
+ self.assertEqual(fc("form.title", nr=0).id, "form.title")
+ if compat:
+ self.assertEqual(fc("password").id, "pswd1")
+ else:
+ self.assertRaises(AmbiguityError, fc, "password")
+ self.assertEqual(fc("password", id="pswd2").id, "pswd2")
+ self.assertRaises(ControlNotFoundError, fc, "form.title", nr=1)
+ self.assertRaises(ControlNotFoundError, fc, nr=50)
+ self.assertRaises(ValueError, fc, nr=-1)
+ self.assertRaises(ControlNotFoundError, fc, label="Bananas")
+
+ # label
+ self.assertEqual(fc(label="Title").id, "form.title")
+ self.assertEqual(fc(label="Book Title").id, "form.title")
+ self.assertRaises(ControlNotFoundError, fc, label=" Book Title ")
+ self.assertRaises(ControlNotFoundError, fc, label="Bananas")
+ self.assertRaises(ControlNotFoundError, fc, label="title")
+
+ self.assertEqual(fc(label="Book", nr=0).id, "form.title")
+ self.assertEqual(fc(label="Book", nr=1).id, "form.quality")
+ if compat:
+ self.assertEqual(fc(label="Book").id, "form.title")
+ else:
+ self.assertRaises(AmbiguityError, fc, label="Book")
+
+ def test_deselect_disabled(self):
+ f = StringIO("""\
+<form>
+ <input type="checkbox" name="p" value="a" disabled checked></input>
+ <input type="checkbox" name="p" value="b"></input>
+ <input type="checkbox" name="p" value="c"></input>
+</form>
+""")
+ def new_form():
+ f.seek(0)
+ form = ClientForm.ParseFile(f, "http://example.com/")[0]
+ form.backwards_compat = compat
+ ctl = form.find_control("p")
+ a = ctl.get("a")
+ return form, ctl, a
+ for compat in [False]:#True, False:
+ form, ctl, a = new_form()
+ ctl.value = ["b"]
+
+ # :-((
+ if compat:
+ # rationale: allowed to deselect, but not select, disabled
+ # items
+ form, ctl, a = new_form()
+ self.assertRaises(AttributeError, setattr, a, "selected", True)
+ self.assertRaises(AttributeError, setattr, ctl, "value", ["a"])
+ a.selected = False
+ form, ctl, a = new_form()
+ ctl.value = ["b"]
+ else:
+ # rationale: Setting an individual item's selected state to its
+ # present value is a no-op, as is setting the whole control
+ # value where an item name doesn't appear in the new value, but
+ # that item is disabled anyway. However, if the item's state
+ # does change, both selecting and deselecting are disallowed
+ # for disabled items.
+ form, ctl, a = new_form()
+ a.selected = True
+ form, ctl, a = new_form()
+ #ctl.value = ["a"]
+ self.assertRaises(AttributeError, setattr, ctl, "value", ["a"])
+ form, ctl, a = new_form()
+ self.assertRaises(AttributeError, setattr, a, "selected", False)
+ ctl.value = ["b"]
+
def test_click(self):
file = StringIO(
"""<form action="abc" name="myform">
@@ -1978,7 +2077,7 @@
def testSetValueByLabelIgnoringAmbiguity(self):
# regression test: follow ClientForm 0.1 behaviour
- # also test that ignore_ambiguity argument to ParseFile works
+ # also test that backwards_compat argument to ParseFile works
f = StringIO("""\
<form>
<select multiple name="form.grocery">
@@ -1989,9 +2088,9 @@
<input type="submit" value="Submit" />
</form>
""")
- for kwds, ignore_ambiguity in [({}, True),
- ({"ignore_ambiguity": True}, True),
- ({"ignore_ambiguity": False}, False),
+ for kwds, backwards_compat in [({}, True),
+ ({"backwards_compat": True}, True),
+ ({"backwards_compat": False}, False),
]:
form = ClientForm.ParseFile(f, "http://localhost/", **kwds)[0]
f.seek(0)
@@ -2000,7 +2099,7 @@
# print [label.text for label in item.get_labels()]
c.set_value_by_label(
["Loaf of Bread", "Loaf of Bread", "Loaf of Challah"])
- if ignore_ambiguity:
+ if backwards_compat:
# select first item of ambiguous set
self.assertEqual(
c.get_value_by_label(),
@@ -2128,7 +2227,7 @@
# test item ambiguity, get, items_from_label, items_from_name, and
# set_value_by_label
# a form can be in two states: either ignoring ambiguity or being
- # careful about it. Currently, by default, a form's ignore_ambiguity
+ # careful about it. Currently, by default, a form's backwards_compat
# attribute is True, so ambiguity is ignored. For instance, notice
# that the form.grocery checkboxes include some loaves of bread and
# a loaf of challah. The code just guesses what you mean:
@@ -2137,10 +2236,10 @@
c.set_value_by_label(["Loaf"])
self.assertEqual(c.get_value_by_label(), ["Loaf of Bread"])
self.assertEqual(c.items[0].id, "1")
- # However, if the form's ignore_ambiguity attribute is False, Ambiguity
+ # However, if the form's backwards_compat attribute is False, Ambiguity
# Errors may be raised. This is generally a preferred approach, but is
# not backwards compatible.
- form.ignore_ambiguity = False
+ form.backwards_compat = False
self.assertRaises(ClientForm.AmbiguityError, c.get, "Loaf", True)
self.assertRaises(
ClientForm.AmbiguityError, c.set_value_by_label, ["Loaf"])
@@ -2453,7 +2552,7 @@
</form>
""")
form = ClientForm.ParseFile(f, "http://example.com/",
- ignore_ambiguity=True)[0]
+ backwards_compat=True)[0]
ctl = form.find_control("form.grocery")
# ordinary case
self.assertEqual(ctl.get("p", nr=1).id, "3")
More information about the wwwsearch-commits
mailing list