[Lxml-checkins] r45121 - in lxml/branch/html/src/lxml/html: . tests
ianb at codespeak.net
ianb at codespeak.net
Mon Jul 16 08:13:11 CEST 2007
Author: ianb
Date: Mon Jul 16 08:13:09 2007
New Revision: 45121
Added:
lxml/branch/html/src/lxml/html/setmixin.py
lxml/branch/html/src/lxml/html/tests/test_forms.py
lxml/branch/html/src/lxml/html/tests/test_forms.txt
Modified:
lxml/branch/html/src/lxml/html/__init__.py
Log:
Add special handling for form and input elements:
* New classes for form, input, select, textarea, and label elements
* Any element can query for its label, labels know what they point to
* input elements know their name and value
* form elements have pointers to their input elements
Also an accessor for head and body and the page's forms.
Also a debugging function, to open a document in a web browser.
Modified: lxml/branch/html/src/lxml/html/__init__.py
==============================================================================
--- lxml/branch/html/src/lxml/html/__init__.py (original)
+++ lxml/branch/html/src/lxml/html/__init__.py Mon Jul 16 08:13:09 2007
@@ -5,10 +5,11 @@
from lxml import etree
from lxml.html import defs
from lxml import cssselect
+from lxml.html.setmixin import SetMixin
__all__ = ['document_fromstring', 'tostring', 'Element', 'defs',
'find_rel_links', 'find_class', 'make_links_absolute',
- 'resolve_base_href', 'iterlinks', 'rewrite_links']
+ 'resolve_base_href', 'iterlinks', 'rewrite_links', 'open_in_browser']
_rel_links_xpath = etree.XPath("descendant-or-self::a[@rel]")
#_class_xpath = etree.XPath(r"descendant-or-self::*[regexp:match(@class, concat('\b', $class_name, '\b'))]", {'regexp': 'http://exslt.org/regular-expressions'})
@@ -17,9 +18,62 @@
_collect_string_content = etree.XPath("string()")
_css_url_re = re.compile(r'url\((.*?)\)', re.I)
_css_import_re = re.compile(r'@import "(.*?)"')
+_label_xpath = etree.XPath("//label[@for=$id]")
class HtmlMixin(object):
+ def forms(self):
+ """
+ Return a list of all the forms
+ """
+ return list(self.getiterator('form'))
+ forms = property(forms, doc=forms.__doc__)
+
+ def body(self):
+ """
+ Return the <body> element. Can be called from a child element
+ to get the document's head.
+ """
+ return self.xpath('//body')[0]
+ body = property(body, doc=body.__doc__)
+
+ def head(self):
+ """
+ Returns the <head> element. Can be called from a child
+ element to get the document's head.
+ """
+ return self.xpath('//head')[0]
+ head = property(head, doc=head.__doc__)
+
+ def label__get(self):
+ """
+ Get or set any <label> element associated with this element.
+ """
+ id = self.get('id')
+ if not id:
+ return None
+ result = _label_xpath(self, id=id)
+ if not result:
+ return None
+ else:
+ return result[0]
+ def label__set(self, label):
+ id = self.get('id')
+ if not id:
+ raise TypeError(
+ "You cannot set a label for an element (%r) that has no id"
+ % self)
+ if not label.tag == 'label':
+ raise TypeError(
+ "You can only assign label to a label element (not %r)"
+ % label)
+ label.set('for', id)
+ def label__del(self):
+ label = self.label
+ if label is not None:
+ del label.attrib['for']
+ label = property(label__get, label__set, label__del, doc=label__get.__doc__)
+
def drop_tree(self):
"""
Removes this element from the tree, including its children and
@@ -232,12 +286,6 @@
works in-place (and so returns None) it returns a serialized form
of the resulting document.
"""
- # FIXME: the None test is a bit sloppy FIXME: this is basically
- # functional if you use it with a string; should it be a
- # functional equivalent for working with elements too? It has to
- # make a copy of the document. The problem is it changes the
- # return type, as it should return the copied document and not a
- # serialization. Is that odd?
def __init__(self, name, copy=False, source_class=HtmlMixin):
self.name = name
self.copy = copy
@@ -259,6 +307,7 @@
doc = copy.deepcopy(doc)
meth = getattr(doc, self.name)
result = meth(*args, **kw)
+ # FIXME: this None test is a bit sloppy
if result is None:
# Then return what we got in
if return_string:
@@ -287,13 +336,27 @@
class HtmlEntity(etree.EntityBase, HtmlMixin):
pass
+
+class HtmlLookup(etree.CustomElementClassLookup):
+ _elements = {}
+
+ def lookup(self, node_type, document, namespace, name):
+ if node_type == 'element':
+ return self._elements.get(name, HtmlElement)
+ elif node_type == 'comment':
+ return HtmlComment
+ elif node_type == 'PI':
+ return HtmlProcessingInstruction
+ elif node_type == 'entity':
+ return HtmlEntity
+ # Otherwise normal lookup
+ return None
+
+
html_parser = etree.HTMLParser()
-html_parser.setElementClassLookup(etree.ElementDefaultClassLookup(
- element=HtmlElement, comment=HtmlComment,
- pi=HtmlProcessingInstruction, entity=HtmlEntity))
+html_parser.setElementClassLookup(HtmlLookup())
def document_fromstring(html):
- # FIXME: should this notice a fragment and parse accordingly?
value = etree.HTML(html, html_parser)
if value is None:
raise etree.ParserError(
@@ -312,7 +375,6 @@
# FIXME: check what happens when you give html with a body, head, etc.
start = html[:20].lstrip().lower()
if not start.startswith('<html') and not start.startswith('<!doctype'):
- # FIXME: That test doesn't work with a doctype or PI
html = '<html><body>%s</body></html>' % html
doc = document_fromstring(html)
assert doc.tag == 'html'
@@ -414,6 +476,12 @@
body.tag = 'span'
return body
+def parse(filename):
+ """
+ Parse a filename, URL, or file-like object into an HTML document.
+ """
+ return etree.parse(filename, html_parser)
+
def _contains_block_level_tag(el):
# FIXME: I could do this with XPath, but would that just be
# unnecessarily slow?
@@ -434,6 +502,590 @@
v = html_parser.makeelement(*args, **kw)
return v
+class FormElement(HtmlElement):
+ """
+ Represents a <form> element.
+ """
+
+ def inputs(self):
+ """
+ Returns an accessor for all the input elements in the form.
+
+ See `InputGetter` for more information about the object.
+ """
+ return InputGetter(self)
+ inputs = property(inputs, doc=inputs.__doc__)
+
+ def _name(self):
+ if self.get('name'):
+ return self.get('name')
+ elif self.get('id'):
+ return '#' + self.get('id')
+ return str(self.body.findall('form').index(self))
+
+ def form_values(self):
+ """
+ Return a list of tuples of the field values for the form.
+ This is suitable to be passed to ``urllib.urlencode()``.
+ """
+ results = []
+ for el in self.inputs:
+ name = el.name
+ if not name:
+ continue
+ if el.tag == 'textarea':
+ results.append((name, el.value))
+ elif el.tag == 'select':
+ value = el.value
+ if el.multiple:
+ for v in value:
+ results.append((name, v))
+ elif value is not None:
+ results.append((name, el.value))
+ else:
+ assert el.tag == 'input', (
+ "Unexpected tag: %r" % el)
+ if el.checkable and not el.checked:
+ continue
+ if el.type in ('submit', 'image', 'reset'):
+ continue
+ value = el.value
+ if value is not None:
+ results.append((name, el.value))
+ return results
+
+ def action__get(self):
+ """
+ Get/set the form's ``action`` attribute.
+ """
+ return self.get('action')
+ def action__set(self, value):
+ self.set('action', value)
+ def action__del(self):
+ if 'action' in self.attrib:
+ del self.attrib['action']
+ action = property(action__get, action__set, action__del, doc=action__get.__doc__)
+
+ def method__get(self):
+ """
+ Get/set the form's method. Always returns a capitalized
+ string, and defaults to ``'GET'``
+ """
+ return self.get('method', 'GET').upper()
+ def method__set(self, value):
+ self.set('method', value.upper())
+ method = property(method__get, method__set, doc=method__get.__doc__)
+
+ def submit(self, extra_values=None):
+ """
+ Submit the form. Returns a file-like object, from
+ ``urllib.urlopen()``. This object also has a ``.geturl()`` function,
+ which shows the URL if there were any redirects.
+
+ You can use this like::
+
+ >>> form = doc.forms[0]
+ >>> form.inputs['foo'].value = 'bar' # etc
+ >>> response = form.submit()
+ >>> doc = parse(response)
+ >>> doc.make_links_absolute(response.geturl())
+ """
+ values = self.form_values()
+ if extra_values:
+ if hasattr(extra_values, 'items'):
+ extra_values = extra_values.items()
+ values.extend(extra_values)
+ import urllib
+ action = self.action
+ ## FIXME: should test that it's not a relative URL or something
+ if self.method == 'GET':
+ if '?' in action:
+ action += '&'
+ else:
+ action += '?'
+ action += urllib.urlencode(values)
+ data = None
+ else:
+ data = urllib.urlencode(values)
+ return urllib.urlopen(action, data)
+
+HtmlLookup._elements['form'] = FormElement
+
+class InputGetter(object):
+
+ """
+ An accessor that represents all the input fields in a form.
+
+ You can get fields by name from this, with
+ ``form.inputs['field_name']``. If there are a set of checkboxes
+ with the same name, they are returned as a list (a `CheckboxGroup`
+ which also allows value setting). Radio inputs are handled
+ similarly.
+
+ You can also iterate over this to get all input elements. This
+ won't return the same thing as if you get all the names, as
+ checkboxes and radio elements are returned individually.
+ """
+
+ _name_xpath = etree.XPath(".//*[@name = $name and (name(.) = 'select' or name(.) = 'input' or name(.) = 'textarea')]")
+ _all_xpath = etree.XPath(".//*[name() = 'select' or name() = 'input' or name() = 'textarea']")
+
+ def __init__(self, form):
+ self.form = form
+
+ def __repr__(self):
+ return '<%s for form %s>' % (
+ self.__class__.__name__,
+ self.form._name())
+
+ ## FIXME: there should be more methods, and it's unclear if this is
+ ## a dictionary-like object or list-like object
+
+ def __getitem__(self, name):
+ results = self._name_xpath(self.form, name=name)
+ if results:
+ type = results[0].get('type')
+ if type == 'radio' and len(results) > 1:
+ group = RadioGroup(results)
+ group.name = name
+ return group
+ elif type == 'checkbox' and len(results) > 1:
+ group = CheckboxGroup(results)
+ group.name = name
+ return group
+ else:
+ # I don't like throwing away elements like this
+ return results[0]
+ else:
+ raise KeyError(
+ "No input element with the name %r" % name)
+
+ def __iter__(self):
+ ## FIXME: kind of dumb to turn a list into an iterator, only
+ ## to have it likely turned back into a list again :(
+ return iter(self._all_xpath(self.form))
+
+class InputMixin(object):
+
+ """
+ Mix-in for all input elements (input, select, and textarea)
+ """
+
+
+ def name__get(self):
+ """
+ Get/set the name of the element
+ """
+ return self.get('name')
+ def name__set(self, value):
+ self.set('name', value)
+ def name__del(self):
+ if 'name' in self.attrib:
+ del self.attrib['name']
+ name = property(name__get, name__set, name__del, doc=name__get.__doc__)
+
+ def __repr__(self):
+ type = getattr(self, 'type', None)
+ if type:
+ type = ' type=%r' % type
+ else:
+ type = ''
+ return '<%s %x name=%r%s>' % (
+ self.__class__.__name__, id(self), self.name, type)
+
+class TextareaElement(InputMixin, HtmlElement):
+ """
+ ``<textarea>`` element. You can get the name with ``.name`` and
+ get/set the value with ``.value``
+ """
+
+ def value__get(self):
+ """
+ Get/set the value (which is the contents of this element)
+ """
+ return self.text or ''
+ def value__set(self, value):
+ self.text = value
+ def value__del(self):
+ self.text = ''
+ value = property(value__get, value__set, value__del, doc=value__get.__doc__)
+
+HtmlLookup._elements['textarea'] = TextareaElement
+
+class SelectElement(InputMixin, HtmlElement):
+ """
+ ``<select>`` element. You can get the name with ``.name``.
+
+ ``.value`` will be the value of the selected option, unless this
+ is a multi-select element (``<select multiple>``), in which case
+ it will be a set-like object. In either case ``.value_options``
+ gives the possible values.
+
+ The boolean attribute ``.multiple`` shows if this is a
+ multi-select.
+ """
+
+ def value__get(self):
+ """
+ Get/set the value of this select (the selected option).
+
+ If this is a multi-select, this is a set-like object that
+ represents all the selected options.
+ """
+ if self.multiple:
+ return MultipleSelectOptions(self)
+ for el in self.getiterator('option'):
+ if 'selected' in el.attrib:
+ value = el.get('value')
+ # FIXME: If value is None, what to return?, get_text()?
+ return value
+ return None
+
+ def value__set(self, value):
+ if self.multiple:
+ if isinstance(value, basestring):
+ raise TypeError(
+ "You must pass in a sequence")
+ self.value.clear()
+ self.value.update(value)
+ return
+ if value is not None:
+ for el in self.getiterator('option'):
+ # FIXME: also if el.get('value') is None?
+ if el.get('value') == value:
+ checked_option = el
+ break
+ else:
+ raise ValueError(
+ "There is no option with the value of %r" % value)
+ for el in self.getiterator('option'):
+ if 'selected' in el.attrib:
+ del el.attrib['selected']
+ if value is not None:
+ checked_option.set('selected', '')
+
+ def value__del(self):
+ # FIXME: should del be allowed at all?
+ if self.multiple:
+ self.value.clear()
+ else:
+ self.value = None
+
+ value = property(value__get, value__set, value__del, doc=value__get.__doc__)
+
+ def value_options(self):
+ """
+ All the possible values this select can have (the ``value``
+ attribute of all the ``<option>`` elements.
+ """
+ return [el.get('value') for el in self.getiterator('option')]
+ value_options = property(value_options, doc=value_options.__doc__)
+
+ def multiple__get(self):
+ """
+ Boolean attribute: is there a ``multiple`` attribute on this element.
+ """
+ return 'multiple' in self.attrib
+ def multiple__set(self, value):
+ if value:
+ self.set('multiple', '')
+ elif 'multiple' in self.attrib:
+ del self.attrib['multiple']
+ multiple = property(multiple__get, multiple__set, doc=multiple__get.__doc__)
+
+HtmlLookup._elements['select'] = SelectElement
+
+class MultipleSelectOptions(SetMixin):
+ """
+ Represents all the selected options in a ``<select multiple>`` element.
+
+ You can add to this set-like option to select an option, or remove
+ to unselect the option.
+ """
+
+ def __init__(self, select):
+ self.select = select
+
+ def options(self):
+ """
+ Iterator of all the ``<option>`` elements.
+ """
+ return self.select.getiterator('option')
+ options = property(options)
+
+ def __iter__(self):
+ for option in self.options:
+ yield option.get('value')
+
+ def add(self, item):
+ for option in self.options:
+ if option.get('value') == item:
+ option.set('selected', '')
+ break
+ else:
+ raise ValueError(
+ "There is no option with the value %r" % item)
+
+ def remove(self, item):
+ for option in self.options:
+ if option.get('value') == item:
+ if 'selected' in option.attrib:
+ del option.attrib['selected']
+ else:
+ raise ValueError(
+ "The option %r is not currently selected" % item)
+ break
+ else:
+ raise ValueError(
+ "There is not option with the value %r" % item)
+
+ def __repr__(self):
+ return '<%s {%s} for select name=%r>' % (
+ self.__class__.__name__,
+ ', '.join([repr(v) for v in self]),
+ self.select.name)
+
+class RadioGroup(list):
+ """
+ This object represents several ``<input type=radio>`` elements
+ that have the same name.
+
+ You can use this like a list, but also use the property
+ ``.value`` to check/uncheck inputs. Also you can use
+ ``.value_options`` to get the possible values.
+ """
+
+ def value__get(self):
+ """
+ Get/set the value, which checks the radio with that value (and
+ unchecks any other value).
+ """
+ for el in self:
+ if 'checked' in el.attrib:
+ return el.get('value')
+ return None
+
+ def value__set(self, value):
+ if value is not None:
+ for el in self:
+ if el.get('value') == value:
+ checked_option = el
+ break
+ else:
+ raise ValueError(
+ "There is no radio input with the value %r" % value)
+ for el in self:
+ if 'checked' in el.attrib:
+ del el.attrib['checked']
+ if value is not None:
+ checked_option.set('checked', '')
+
+ def value__del(self):
+ self.value = None
+
+ value = property(value__get, value__set, value__del, doc=value__get.__doc__)
+
+ def value_options(self):
+ """
+ Returns a list of all the possible values.
+ """
+ return [el.get('value') for el in self]
+ value_options = property(value_options, doc=value_options.__doc__)
+
+ def __repr__(self):
+ return '%s(%s)' % (
+ self.__class__.__name__,
+ list.__repr__(self))
+
+class CheckboxGroup(list):
+ """
+ Represents a group of checkboxes (``<input type=checkbox>``) that
+ have the same name.
+
+ In addition to using this like a list, the ``.values`` attribute
+ returns a set-like object that you can add to or remove from to
+ check and uncheck checkboxes. You can also use ``.value_options``
+ to get the possible values.
+ """
+
+ ## FIXME: should this be named .value?
+ def values__get(self):
+ """
+ Return a set-like object that can be modified to check or
+ uncheck individual checkboxes according to their value.
+ """
+ return CheckboxValues(self)
+ def values__set(self, value):
+ self.values.clear()
+ self.values |= value
+ def values__del(self):
+ self.values.clear()
+ values = property(values__get, values__set, values__del, doc=values__get.__doc__)
+
+ def __repr__(self):
+ return '%s(%s)' % (
+ self.__class__.__name__, list.__repr__(self))
+
+class CheckboxValues(SetMixin):
+
+ """
+ Represents the values of the checked checkboxes in a group of
+ checkboxes with the same name.
+ """
+
+ def __init__(self, group):
+ self.group = group
+
+ def __iter__(self):
+ return iter([
+ el.get('value')
+ for el in self.group
+ if 'checked' in el.attrib])
+
+ def add(self, value):
+ for el in self.group:
+ if el.get('value') == value:
+ el.set('checked', '')
+ break
+ else:
+ raise KeyError("No checkbox with value %r" % value)
+
+ def remove(self, value):
+ for el in self.group:
+ if el.get('value') == value:
+ if 'checked' in el.attrib:
+ del el.attrib['checked']
+ else:
+ raise KeyError(
+ "The checkbox with value %r was already unchecked" % value)
+ break
+ else:
+ raise KeyError(
+ "No checkbox with value %r" % value)
+
+ def __repr__(self):
+ return '<%s {%s} for checkboxes name=%r>' % (
+ self.__class__.__name__,
+ ', '.join([repr(v) for v in self]),
+ self.group.name)
+
+class InputElement(InputMixin, HtmlElement):
+ """
+ Represents an ``<input>`` element.
+
+ You can get the type with ``.type`` (which is lower-cased and
+ defaults to ``'text'``).
+
+ Also you can get and set the value with ``.value``
+
+ Checkboxes and radios have the attribute ``input.checkable ==
+ True`` (for all others it is false) and a boolean attribute
+ ``.checked``.
+
+ """
+
+ ## FIXME: I'm a little uncomfortable with the use of .checked
+ def value__get(self):
+ """
+ Get/set the value of this element, using the ``value`` attribute.
+
+ Also, if this is a checkbox and it has no value, this defaults
+ to ``'on'``. If it is a checkbox or radio that is not
+ checked, this returns None.
+ """
+ if self.checkable:
+ if self.checked:
+ return self.get('value') or 'on'
+ else:
+ return None
+ return self.get('value')
+ def value__set(self, value):
+ if self.checkable:
+ if not value:
+ self.checked = False
+ else:
+ self.checked = True
+ if isinstance(value, basestring):
+ self.set('value', value)
+ else:
+ self.set('value', value)
+ def value__del(self):
+ if self.checkable:
+ self.checked = False
+ else:
+ if 'value' in self.attrib:
+ del self.attrib['value']
+ value = property(value__get, value__set, value__del, doc=value__get.__doc__)
+
+ def type__get(self):
+ """
+ Return the type of this element (using the type attribute).
+ """
+ return self.get('type', 'text').lower()
+ def type__set(self, value):
+ self.set('type', value)
+ type = property(type__get, type__set, doc=type__get.__doc__)
+
+ def checkable__get(self):
+ """
+ Boolean: can this element be checked?
+ """
+ return self.type in ['checkbox', 'radio']
+ checkable = property(checkable__get, doc=checkable.__doc__)
+
+ def checked__get(self):
+ """
+ Boolean attribute to get/set the presence of the ``checked``
+ attribute.
+
+ You can only use this on checkable input types.
+ """
+ if not self.checkable:
+ raise AttributeError('Not a checkable input type')
+ return 'checked' in self.attrib
+ def checked__set(self, value):
+ if not self.checkable:
+ raise AttributeError('Not a checkable input type')
+ if value:
+ self.set('checked', '')
+ else:
+ if 'checked' in self.attrib:
+ del self.attrib['checked']
+ checked = property(checked__get, checked__set, doc=checked__get.__doc__)
+
+HtmlLookup._elements['input'] = InputElement
+
+class LabelElement(HtmlElement):
+ """
+ Represents a ``<label>`` element.
+
+ Label elements are linked to other elements with their ``for``
+ attribute. You can access this element with ``label.for_element``.
+ """
+
+ def for_element__get(self):
+ """
+ Get/set the element this label points to. Return None if it
+ can't be found.
+ """
+ id = self.get('for')
+ if not id:
+ return None
+ return self.body.get_element_by_id(id)
+ def for_element__set(self, other):
+ id = other.get('id')
+ if not id:
+ raise TypeError(
+ "Element %r has no id attribute" % other)
+ self.set('for', id)
+ def for_element__del(self):
+ if 'id' in self.attrib:
+ del self.attrib['id']
+ for_element = property(for_element__get, for_element__set, for_element__del,
+ doc=for_element__get.__doc__)
+
+HtmlLookup._elements['label'] = LabelElement
+
############################################################
## Serialization
############################################################
@@ -488,3 +1140,19 @@
if not include_meta_content_type:
html = __replace_meta_content_type('', html)
return html
+
+def open_in_browser(doc):
+ """
+ Open the HTML document in a web browser (saving it to a temporary
+ file to open it).
+ """
+ import os
+ import webbrowser
+ fn = os.tempnam() + '.html'
+ f = open(fn, 'wb')
+ f.write(tostring(doc, include_meta_content_type=True))
+ f.close()
+ url = 'file://' + fn.replace(os.path.sep, '/')
+ print url
+ webbrowser.open(url)
+
Added: lxml/branch/html/src/lxml/html/setmixin.py
==============================================================================
--- (empty file)
+++ lxml/branch/html/src/lxml/html/setmixin.py Mon Jul 16 08:13:09 2007
@@ -0,0 +1,104 @@
+class SetMixin(object):
+
+ """
+ Mix-in for sets. You must define __iter__, add, remove
+ """
+
+ def __len__(self):
+ return len(list(self))
+
+ def __contains__(self, item):
+ for has_item in self:
+ if item == has_item:
+ return True
+ return False
+
+ def issubset(self, other):
+ for item in other:
+ if item not in self:
+ return False
+ return True
+
+ __le__ = issubset
+
+ def issuperset(self, other):
+ for item in self:
+ if item not in other:
+ return False
+ return True
+
+ __ge__ = issuperset
+
+ def union(self, other):
+ return self | other
+
+ def __or__(self, other):
+ new = self.copy()
+ new |= other
+ return new
+
+ def intersection(self, other):
+ return self & other
+
+ def __and__(self, other):
+ new = self.copy()
+ new &= other
+ return new
+
+ def difference(self, other):
+ return self - other
+
+ def __sub__(self, other):
+ new = self.copy()
+ new -= other
+ return new
+
+ def symmetric_difference(self, other):
+ return self ^ other
+
+ def __xor__(self, other):
+ new = self.copy()
+ new ^= other
+ return new
+
+ def copy(self):
+ return set(self)
+
+ def update(self, other):
+ for item in other:
+ self.add(item)
+
+ __ior__ = update
+
+ def intersection_update(self, other):
+ for item in self:
+ if item not in other:
+ self.remove(item)
+
+ __iand__ = intersection_update
+
+ def difference_update(self, other):
+ for item in other:
+ if item in self:
+ self.remove(item)
+
+ __isub__ = difference_update
+
+ def symmetric_difference_update(self, other):
+ for item in other:
+ if item in self:
+ self.remove(item)
+ else:
+ self.add(item)
+
+ __ixor__ = symmetric_difference_update
+
+ def discard(self, item):
+ try:
+ self.remove(item)
+ except KeyError:
+ pass
+
+ def clear(self):
+ for item in list(self):
+ self.remove(item)
Added: lxml/branch/html/src/lxml/html/tests/test_forms.py
==============================================================================
--- (empty file)
+++ lxml/branch/html/src/lxml/html/tests/test_forms.py Mon Jul 16 08:13:09 2007
@@ -0,0 +1,10 @@
+import unittest
+from lxml.tests.common_imports import doctest
+
+def test_suite():
+ suite = unittest.TestSuite()
+ suite.addTests([doctest.DocFileSuite('test_forms.txt')])
+ return suite
+
+if __name__ == '__main__':
+ unittest.main()
Added: lxml/branch/html/src/lxml/html/tests/test_forms.txt
==============================================================================
--- (empty file)
+++ lxml/branch/html/src/lxml/html/tests/test_forms.txt Mon Jul 16 08:13:09 2007
@@ -0,0 +1,123 @@
+>>> from lxml.html import usedoctest
+>>> from lxml.html import fromstring, tostring
+>>> h = fromstring('''<html><body>
+... <form action="test">
+... <input type="hidden" name="hidden_field" value="hidden_value">
+... <input type="text" name="text_field" value="text_value">
+... <input type="checkbox" name="single_checkbox">
+... <input type="checkbox" name="single_checkbox2" value="good">
+... <input type="checkbox" name="check_group" value="1">
+... <input type="checkbox" name="check_group" value="2" checked>
+... <input type="checkbox" name="check_group" value="3" checked>
+... <input type="checkbox" name="check_group" value="4">
+... <textarea name="textarea_field">some text</textarea>
+... <label for="value1">value 1</label>
+... <input type="radio" name="radios" value="value1" id="value1">
+... <label for="value2">value 2</label>
+... <input type="radio" name="radios" value="value2" id="value2">
+... <label for="value3">value 3</label>
+... <input type="radio" name="radios" value="value3" id="value3" checked>
+... <select name="select1">
+... <option>No value</option>
+... <option value="">Empty</option>
+... <option value="1">number 1</option>
+... </select>
+... <select name="select2" multiple>
+... <option value="1">number 1</option>
+... <option value="2">number 2</option>
+... <option value="3">number 3</option>
+... </select>
+... <input type="submit" name="submit1" value="submit">
+... <input type="submit" name="submit2" value="submit">
+... <input type="reset" name="reset1">linksys
+... </form>
+... </body></html>''')
+>>> f = h.forms[0]
+>>> f.action
+'test'
+>>> f.method
+'GET'
+>>> f.inputs
+<InputGetter for form 0>
+>>> hidden = f.inputs['hidden_field']
+>>> hidden.checkable
+False
+>>> hidden.value
+'hidden_value'
+>>> hidden.value = 'new value'
+>>> print tostring(hidden)
+<input type="hidden" name="hidden_field" value="new value">
+>>> checkbox = f.inputs['single_checkbox']
+>>> checkbox.checkable
+True
+>>> checkbox.type
+'checkbox'
+>>> checkbox.checked
+False
+>>> print checkbox.value
+None
+>>> checkbox.checked = True
+>>> checkbox.value
+'on'
+>>> print tostring(checkbox)
+<input type="checkbox" name="single_checkbox" checked>
+>>> checkbox2 = f.inputs['single_checkbox2']
+>>> checkbox2.checked = True
+>>> checkbox2.value
+'good'
+>>> group = f.inputs['check_group']
+>>> group.values
+<CheckboxValues {'2', '3'} for checkboxes name='check_group'>
+>>> group.values.add('1')
+>>> group.values
+<CheckboxValues {'1', '2', '3'} for checkboxes name='check_group'>
+>>> print tostring(group[0])
+<input type="checkbox" name="check_group" value="1" checked>
+>>> group.values.add('doesnotexist')
+Traceback (most recent call last):
+ ...
+KeyError: "No checkbox with value 'doesnotexist'"
+>>> textarea = f.inputs['textarea_field']
+>>> textarea.value
+'some text'
+>>> radios = f.inputs['radios']
+>>> radios[0].label.text
+'value 1'
+>>> radios.value
+'value3'
+>>> radios.value = 'value1'
+>>> radios.value
+'value1'
+>>> print tostring(radios[0])
+<input type="radio" name="radios" value="value1" id="value1" checked>
+>>> radios.value = None
+>>> print tostring(radios[0])
+<input type="radio" name="radios" value="value1" id="value1">
+>>> radios.value_options
+['value1', 'value2', 'value3']
+>>> select = f.inputs['select1']
+>>> print select.value
+None
+>>> select.value = ""
+>>> select.value
+''
+>>> select.value = 'asdf'
+Traceback (most recent call last):
+ ...
+ValueError: There is no option with the value of 'asdf'
+>>> select.value_options
+[None, '', '1']
+>>> select = f.inputs['select2']
+>>> select.value
+<MultipleSelectOptions {'1', '2', '3'} for select name='select2'>
+>>> select.value.update(['2', '3'])
+>>> select.value.remove('3')
+>>> select.value.add('asdf')
+Traceback (most recent call last):
+ ...
+ValueError: There is no option with the value 'asdf'
+>>> select.value_options
+['1', '2', '3']
+>>> import urllib
+>>> print urllib.urlencode(f.form_values())
+hidden_field=new+value&text_field=text_value&single_checkbox=on&single_checkbox2=good&check_group=1&check_group=2&check_group=3&textarea_field=some+text&select1=&select2=1&select2=2&select2=3
More information about the lxml-checkins
mailing list