[Lxml-checkins] r44252 - in lxml/branch/html/src/lxml/html: . tests

ianb at codespeak.net ianb at codespeak.net
Fri Jun 15 00:04:12 CEST 2007


Author: ianb
Date: Fri Jun 15 00:04:11 2007
New Revision: 44252

Added:
   lxml/branch/html/src/lxml/html/formfill.py
   lxml/branch/html/src/lxml/html/tests/test_formfill.py
   lxml/branch/html/src/lxml/html/tests/test_formfill.txt
Log:
Added a form filling module; not fully tested yet

Added: lxml/branch/html/src/lxml/html/formfill.py
==============================================================================
--- (empty file)
+++ lxml/branch/html/src/lxml/html/formfill.py	Fri Jun 15 00:04:11 2007
@@ -0,0 +1,166 @@
+from lxml.etree import XPath
+from lxml.html import HTML, tostring
+
+__all__ = ['FormNotFound', 'fill_form']
+
+class FormNotFound(LookupError):
+    """
+    Raised when no form can be found
+    """
+
+_form_name_xpath = XPath('descendant-or-self::form[name=$name]')
+_input_xpath = XPath('descendant-or-self::input | descendant-or-self::select | descendant-or-self::textarea')
+
+def fill_form(
+    el,
+    values,
+    form_id=None,
+    form_index=None,
+    ):
+    el = _find_form(el, form_id=form_id, form_index=form_index)
+    _fill_form(el, values)
+
+def fill_form_html(html, values, form_id=None, form_index=None):
+    if isinstance(html, basestring):
+        doc = HTML(html)
+        return_string = True
+    else:
+        doc = copy.deepcopy(html)
+        return_string = False
+    fill_form(doc, values, form_id=form_id, form_index=form_index)
+    if return_string:
+        return tostring(doc)
+    else:
+        return doc
+
+def _fill_form(el, values):
+    counts = {}
+    if hasattr(values, 'mixed'):
+        # For Paste request parameters
+        values = values.mixed()
+    inputs = _input_xpath(el)
+    for input in inputs:
+        name = input.get('name')
+        if not name:
+            continue
+        if _takes_multiple(input):
+            value = values.get(name, [])
+            if not isinstance(value, (list, tuple)):
+                value = [value]
+            _fill_multiple(input, value)
+        elif name not in values:
+            continue
+        else:
+            index = counts.get(name, 0)
+            counts[name] = index + 1
+            value = values[name]
+            if isinstance(value, (list, tuple)):
+                try:
+                    value = value[index]
+                except IndexError:
+                    continue
+            elif index > 0:
+                continue
+            _fill_single(input, value)
+
+def _takes_multiple(input):
+    if input.tag == 'select' and input.get('multiple'):
+        # FIXME: multiple="0"?
+        return True
+    type = input.get('type', '').lower()
+    if type in ('radio', 'checkbox'):
+        return True
+    return False
+
+def _fill_multiple(input, value):
+    type = input.get('type', '').lower()
+    if type == 'checkbox':
+        v = input.get('value')
+        if v is None:
+            if not value:
+                result = False
+            else:
+                result = value[0]
+                if isinstance(value, basestring):
+                    # The only valid "on" value for an unnamed checkbox is 'on'
+                    result = result == 'on'
+            _check(input, result)
+        else:
+            _check(input, v in value)
+    elif type == 'radio':
+        v = input.get('value')
+        _check(input, v in value)
+    else:
+        assert input.tag == 'select'
+        for option in input.findall('option'):
+            v = option.get('value')
+            if v is None:
+                # This seems to be the default, at least on IE
+                # FIXME: but I'm not sure
+                v = option.text_content()
+            _select(option, v in value)
+
+def _check(el, check):
+    if check:
+        el.set('checked', '')
+    else:
+        if 'checked' in el.attrib:
+            del el.attrib['checked']
+
+def _select(el, select):
+    if select:
+        el.set('selected', '')
+    else:
+        if 'selected' in el.attrib:
+            del el.attrib['selected']
+
+def _fill_single(input, value):
+    if input.tag == 'textarea':
+        input.clear()
+        input.text = value
+    else:
+        input.set('value', value)
+
+def _find_form(el, form_id=None, form_index=None):
+    if form_id is None and form_index is None:
+        forms = el.getiterator('form')
+        for form in forms:
+            return form
+        raise FormNotFound(
+            "No forms in page")
+    if form_id is not None:
+        form = el.get_element_by_id(form_id)
+        if form is not None:
+            return form
+        forms = _form_name_xpath(el, name=form_id)
+        if forms:
+            return forms[0]
+        else:
+            raise FormNotFound(
+                "No form with the name or id of %r (forms: %s)"
+                % (id, ', '.join(_find_form_ids(el))))               
+    if form_index is not None:
+        forms = el.getiterator('form')
+        try:
+            return forms[form_index]
+        except IndexError:
+            raise FormNotFound(
+                "There is no form with the index %r (%i forms found)"
+                % (form_index, len(forms)))
+
+def _find_form_ids(el):
+    forms = el.getiterator('form')
+    if not forms:
+        yield '(no forms)'
+        return
+    for index, form in enumerate(forms):
+        if form.get('id'):
+            if form.get('name'):
+                yield '%s or %s' % (form.get('id'),
+                                     form.get('name'))
+            else:
+                yield form.get('id')
+        elif form.get('name'):
+            yield form.get('name')
+        else:
+            yield '(unnamed form %s)' % index

Added: lxml/branch/html/src/lxml/html/tests/test_formfill.py
==============================================================================
--- (empty file)
+++ lxml/branch/html/src/lxml/html/tests/test_formfill.py	Fri Jun 15 00:04:11 2007
@@ -0,0 +1,7 @@
+import unittest
+from lxml.tests.common_imports import doctest
+
+def test_suite():
+    suite = unittest.TestSuite()
+    suite.addTests([doctest.DocFileSuite('test_formfill.txt')])
+    return suite

Added: lxml/branch/html/src/lxml/html/tests/test_formfill.txt
==============================================================================
--- (empty file)
+++ lxml/branch/html/src/lxml/html/tests/test_formfill.txt	Fri Jun 15 00:04:11 2007
@@ -0,0 +1,54 @@
+Some basic imports:
+
+    >>> from lxml.html import usedoctest
+    >>> from lxml.html.formfill import fill_form_html
+
+The simplest kind of filling is just filling an input with a value:
+
+    >>> print fill_form_html('''
+    ... <form><input type="text" name="foo"></form>''', dict(foo='bar'))
+    <form><input type="text" name="foo" value="bar"></form>
+    
+You can also fill multiple inputs, like:
+
+    >>> print fill_form_html('''
+    ... <form>
+    ...   <input type="text" name="foo">
+    ...   <input type="text" name="foo">
+    ... </form>''', dict(foo=['bar1', 'bar2']))
+    <form>
+      <input type="text" name="foo" value="bar1">
+      <input type="text" name="foo" value="bar2">
+    </form>
+
+Checkboxes can work either as boolean true/false, or be selected based
+on their inclusion in a set of values::
+
+    >>> print fill_form_html('''
+    ... <form>
+    ...   Would you like to be spammed?
+    ...   <input type="checkbox" name="spam_me"> <br>
+    ...   Spam you'd like to receive:<br>
+    ...   Viagra spam:
+    ...       <input type="checkbox" name="type" value="viagra"><br>
+    ...   Stock spam:
+    ...       <input type="checkbox" name="type" value="stock"><br>
+    ...   Other spam:
+    ...       <input type="checkbox" name="type" value="other"><br>
+    ...   <input type="submit" value="Spam!">
+    ... </form>''', dict(spam_me=True, type=['viagra', 'other']))
+    <form>
+      Would you like to be spammed?
+      <input type="checkbox" name="spam_me" checked> <br>
+      Spam you'd like to receive:<br>
+      Viagra spam:
+          <input type="checkbox" name="type" value="viagra" checked><br>
+      Stock spam:
+          <input type="checkbox" name="type" value="stock"><br>
+      Other spam:
+          <input type="checkbox" name="type" value="other" checked><br>
+      <input type="submit" value="Spam!">
+    </form>
+
+FIXME: I need to test more of this.  But I'm lazy and want to use the
+coverage report for some of this.


More information about the lxml-checkins mailing list