[Lxml-checkins] r44252 - in lxml/branch/html/src/lxml/html: . tests
ianb at codespeak.net
ianb at codespeak.net
Fri Jun 15 00:04:12 CEST 2007
Author: ianb
Date: Fri Jun 15 00:04:11 2007
New Revision: 44252
Added:
lxml/branch/html/src/lxml/html/formfill.py
lxml/branch/html/src/lxml/html/tests/test_formfill.py
lxml/branch/html/src/lxml/html/tests/test_formfill.txt
Log:
Added a form filling module; not fully tested yet
Added: lxml/branch/html/src/lxml/html/formfill.py
==============================================================================
--- (empty file)
+++ lxml/branch/html/src/lxml/html/formfill.py Fri Jun 15 00:04:11 2007
@@ -0,0 +1,166 @@
+from lxml.etree import XPath
+from lxml.html import HTML, tostring
+
+__all__ = ['FormNotFound', 'fill_form']
+
+class FormNotFound(LookupError):
+ """
+ Raised when no form can be found
+ """
+
+_form_name_xpath = XPath('descendant-or-self::form[name=$name]')
+_input_xpath = XPath('descendant-or-self::input | descendant-or-self::select | descendant-or-self::textarea')
+
+def fill_form(
+ el,
+ values,
+ form_id=None,
+ form_index=None,
+ ):
+ el = _find_form(el, form_id=form_id, form_index=form_index)
+ _fill_form(el, values)
+
+def fill_form_html(html, values, form_id=None, form_index=None):
+ if isinstance(html, basestring):
+ doc = HTML(html)
+ return_string = True
+ else:
+ doc = copy.deepcopy(html)
+ return_string = False
+ fill_form(doc, values, form_id=form_id, form_index=form_index)
+ if return_string:
+ return tostring(doc)
+ else:
+ return doc
+
+def _fill_form(el, values):
+ counts = {}
+ if hasattr(values, 'mixed'):
+ # For Paste request parameters
+ values = values.mixed()
+ inputs = _input_xpath(el)
+ for input in inputs:
+ name = input.get('name')
+ if not name:
+ continue
+ if _takes_multiple(input):
+ value = values.get(name, [])
+ if not isinstance(value, (list, tuple)):
+ value = [value]
+ _fill_multiple(input, value)
+ elif name not in values:
+ continue
+ else:
+ index = counts.get(name, 0)
+ counts[name] = index + 1
+ value = values[name]
+ if isinstance(value, (list, tuple)):
+ try:
+ value = value[index]
+ except IndexError:
+ continue
+ elif index > 0:
+ continue
+ _fill_single(input, value)
+
+def _takes_multiple(input):
+ if input.tag == 'select' and input.get('multiple'):
+ # FIXME: multiple="0"?
+ return True
+ type = input.get('type', '').lower()
+ if type in ('radio', 'checkbox'):
+ return True
+ return False
+
+def _fill_multiple(input, value):
+ type = input.get('type', '').lower()
+ if type == 'checkbox':
+ v = input.get('value')
+ if v is None:
+ if not value:
+ result = False
+ else:
+ result = value[0]
+ if isinstance(value, basestring):
+ # The only valid "on" value for an unnamed checkbox is 'on'
+ result = result == 'on'
+ _check(input, result)
+ else:
+ _check(input, v in value)
+ elif type == 'radio':
+ v = input.get('value')
+ _check(input, v in value)
+ else:
+ assert input.tag == 'select'
+ for option in input.findall('option'):
+ v = option.get('value')
+ if v is None:
+ # This seems to be the default, at least on IE
+ # FIXME: but I'm not sure
+ v = option.text_content()
+ _select(option, v in value)
+
+def _check(el, check):
+ if check:
+ el.set('checked', '')
+ else:
+ if 'checked' in el.attrib:
+ del el.attrib['checked']
+
+def _select(el, select):
+ if select:
+ el.set('selected', '')
+ else:
+ if 'selected' in el.attrib:
+ del el.attrib['selected']
+
+def _fill_single(input, value):
+ if input.tag == 'textarea':
+ input.clear()
+ input.text = value
+ else:
+ input.set('value', value)
+
+def _find_form(el, form_id=None, form_index=None):
+ if form_id is None and form_index is None:
+ forms = el.getiterator('form')
+ for form in forms:
+ return form
+ raise FormNotFound(
+ "No forms in page")
+ if form_id is not None:
+ form = el.get_element_by_id(form_id)
+ if form is not None:
+ return form
+ forms = _form_name_xpath(el, name=form_id)
+ if forms:
+ return forms[0]
+ else:
+ raise FormNotFound(
+ "No form with the name or id of %r (forms: %s)"
+ % (id, ', '.join(_find_form_ids(el))))
+ if form_index is not None:
+ forms = el.getiterator('form')
+ try:
+ return forms[form_index]
+ except IndexError:
+ raise FormNotFound(
+ "There is no form with the index %r (%i forms found)"
+ % (form_index, len(forms)))
+
+def _find_form_ids(el):
+ forms = el.getiterator('form')
+ if not forms:
+ yield '(no forms)'
+ return
+ for index, form in enumerate(forms):
+ if form.get('id'):
+ if form.get('name'):
+ yield '%s or %s' % (form.get('id'),
+ form.get('name'))
+ else:
+ yield form.get('id')
+ elif form.get('name'):
+ yield form.get('name')
+ else:
+ yield '(unnamed form %s)' % index
Added: lxml/branch/html/src/lxml/html/tests/test_formfill.py
==============================================================================
--- (empty file)
+++ lxml/branch/html/src/lxml/html/tests/test_formfill.py Fri Jun 15 00:04:11 2007
@@ -0,0 +1,7 @@
+import unittest
+from lxml.tests.common_imports import doctest
+
+def test_suite():
+ suite = unittest.TestSuite()
+ suite.addTests([doctest.DocFileSuite('test_formfill.txt')])
+ return suite
Added: lxml/branch/html/src/lxml/html/tests/test_formfill.txt
==============================================================================
--- (empty file)
+++ lxml/branch/html/src/lxml/html/tests/test_formfill.txt Fri Jun 15 00:04:11 2007
@@ -0,0 +1,54 @@
+Some basic imports:
+
+ >>> from lxml.html import usedoctest
+ >>> from lxml.html.formfill import fill_form_html
+
+The simplest kind of filling is just filling an input with a value:
+
+ >>> print fill_form_html('''
+ ... <form><input type="text" name="foo"></form>''', dict(foo='bar'))
+ <form><input type="text" name="foo" value="bar"></form>
+
+You can also fill multiple inputs, like:
+
+ >>> print fill_form_html('''
+ ... <form>
+ ... <input type="text" name="foo">
+ ... <input type="text" name="foo">
+ ... </form>''', dict(foo=['bar1', 'bar2']))
+ <form>
+ <input type="text" name="foo" value="bar1">
+ <input type="text" name="foo" value="bar2">
+ </form>
+
+Checkboxes can work either as boolean true/false, or be selected based
+on their inclusion in a set of values::
+
+ >>> print fill_form_html('''
+ ... <form>
+ ... Would you like to be spammed?
+ ... <input type="checkbox" name="spam_me"> <br>
+ ... Spam you'd like to receive:<br>
+ ... Viagra spam:
+ ... <input type="checkbox" name="type" value="viagra"><br>
+ ... Stock spam:
+ ... <input type="checkbox" name="type" value="stock"><br>
+ ... Other spam:
+ ... <input type="checkbox" name="type" value="other"><br>
+ ... <input type="submit" value="Spam!">
+ ... </form>''', dict(spam_me=True, type=['viagra', 'other']))
+ <form>
+ Would you like to be spammed?
+ <input type="checkbox" name="spam_me" checked> <br>
+ Spam you'd like to receive:<br>
+ Viagra spam:
+ <input type="checkbox" name="type" value="viagra" checked><br>
+ Stock spam:
+ <input type="checkbox" name="type" value="stock"><br>
+ Other spam:
+ <input type="checkbox" name="type" value="other" checked><br>
+ <input type="submit" value="Spam!">
+ </form>
+
+FIXME: I need to test more of this. But I'm lazy and want to use the
+coverage report for some of this.
More information about the lxml-checkins
mailing list