[Lxml-checkins] r45121 - in lxml/branch/html/src/lxml/html: . tests

ianb at codespeak.net ianb at codespeak.net
Mon Jul 16 08:13:11 CEST 2007


Author: ianb
Date: Mon Jul 16 08:13:09 2007
New Revision: 45121

Added:
   lxml/branch/html/src/lxml/html/setmixin.py
   lxml/branch/html/src/lxml/html/tests/test_forms.py
   lxml/branch/html/src/lxml/html/tests/test_forms.txt
Modified:
   lxml/branch/html/src/lxml/html/__init__.py
Log:
Add special handling for form and input elements:
* New classes for form, input, select, textarea, and label elements
* Any element can query for its label, labels know what they point to
* input elements know their name and value
* form elements have pointers to their input elements

Also an accessor for head and body and the page's forms.  

Also a debugging function, to open a document in a web browser.


Modified: lxml/branch/html/src/lxml/html/__init__.py
==============================================================================
--- lxml/branch/html/src/lxml/html/__init__.py	(original)
+++ lxml/branch/html/src/lxml/html/__init__.py	Mon Jul 16 08:13:09 2007
@@ -5,10 +5,11 @@
 from lxml import etree
 from lxml.html import defs
 from lxml import cssselect
+from lxml.html.setmixin import SetMixin
 
 __all__ = ['document_fromstring', 'tostring', 'Element', 'defs',
            'find_rel_links', 'find_class', 'make_links_absolute',
-           'resolve_base_href', 'iterlinks', 'rewrite_links']
+           'resolve_base_href', 'iterlinks', 'rewrite_links', 'open_in_browser']
 
 _rel_links_xpath = etree.XPath("descendant-or-self::a[@rel]")
 #_class_xpath = etree.XPath(r"descendant-or-self::*[regexp:match(@class, concat('\b', $class_name, '\b'))]", {'regexp': 'http://exslt.org/regular-expressions'})
@@ -17,9 +18,62 @@
 _collect_string_content = etree.XPath("string()")
 _css_url_re = re.compile(r'url\((.*?)\)', re.I)
 _css_import_re = re.compile(r'@import "(.*?)"')
+_label_xpath = etree.XPath("//label[@for=$id]")
 
 class HtmlMixin(object):
 
+    def forms(self):
+        """
+        Return a list of all the forms
+        """
+        return list(self.getiterator('form'))
+    forms = property(forms, doc=forms.__doc__)
+
+    def body(self):
+        """
+        Return the <body> element.  Can be called from a child element
+        to get the document's head.
+        """
+        return self.xpath('//body')[0]
+    body = property(body, doc=body.__doc__)
+
+    def head(self):
+        """
+        Returns the <head> element.  Can be called from a child
+        element to get the document's head.
+        """
+        return self.xpath('//head')[0]
+    head = property(head, doc=head.__doc__)
+
+    def label__get(self):
+        """
+        Get or set any <label> element associated with this element.
+        """
+        id = self.get('id')
+        if not id:
+            return None
+        result = _label_xpath(self, id=id)
+        if not result:
+            return None
+        else:
+            return result[0]
+    def label__set(self, label):
+        id = self.get('id')
+        if not id:
+            raise TypeError(
+                "You cannot set a label for an element (%r) that has no id"
+                % self)
+        if not label.tag == 'label':
+            raise TypeError(
+                "You can only assign label to a label element (not %r)"
+                % label)
+        label.set('for', id)
+    def label__del(self):
+        label = self.label
+        if label is not None:
+            del label.attrib['for']
+    label = property(label__get, label__set, label__del, doc=label__get.__doc__)
+
     def drop_tree(self):
         """
         Removes this element from the tree, including its children and
@@ -232,12 +286,6 @@
     works in-place (and so returns None) it returns a serialized form
     of the resulting document.
     """
-    # FIXME: the None test is a bit sloppy FIXME: this is basically
-    # functional if you use it with a string; should it be a
-    # functional equivalent for working with elements too?  It has to
-    # make a copy of the document.  The problem is it changes the
-    # return type, as it should return the copied document and not a
-    # serialization.  Is that odd?
     def __init__(self, name, copy=False, source_class=HtmlMixin):
         self.name = name
         self.copy = copy
@@ -259,6 +307,7 @@
                 doc = copy.deepcopy(doc)
         meth = getattr(doc, self.name)
         result = meth(*args, **kw)
+        # FIXME: this None test is a bit sloppy 
         if result is None:
             # Then return what we got in
             if return_string:
@@ -287,13 +336,27 @@
 class HtmlEntity(etree.EntityBase, HtmlMixin):
     pass
 
+
+class HtmlLookup(etree.CustomElementClassLookup):
+    _elements = {}
+
+    def lookup(self, node_type, document, namespace, name):
+        if node_type == 'element':
+            return self._elements.get(name, HtmlElement)
+        elif node_type == 'comment':
+            return HtmlComment
+        elif node_type == 'PI':
+            return HtmlProcessingInstruction
+        elif node_type == 'entity':
+            return HtmlEntity
+        # Otherwise normal lookup
+        return None
+    
+
 html_parser = etree.HTMLParser()
-html_parser.setElementClassLookup(etree.ElementDefaultClassLookup(
-    element=HtmlElement, comment=HtmlComment,
-    pi=HtmlProcessingInstruction, entity=HtmlEntity))
+html_parser.setElementClassLookup(HtmlLookup())
 
 def document_fromstring(html):
-    # FIXME: should this notice a fragment and parse accordingly?
     value = etree.HTML(html, html_parser)
     if value is None:
         raise etree.ParserError(
@@ -312,7 +375,6 @@
     # FIXME: check what happens when you give html with a body, head, etc.
     start = html[:20].lstrip().lower()
     if not start.startswith('<html') and not start.startswith('<!doctype'):
-        # FIXME: That test doesn't work with a doctype or PI
         html = '<html><body>%s</body></html>' % html
     doc = document_fromstring(html)
     assert doc.tag == 'html'
@@ -414,6 +476,12 @@
         body.tag = 'span'
     return body
 
+def parse(filename):
+    """
+    Parse a filename, URL, or file-like object into an HTML document.
+    """
+    return etree.parse(filename, html_parser)
+
 def _contains_block_level_tag(el):
     # FIXME: I could do this with XPath, but would that just be
     # unnecessarily slow?
@@ -434,6 +502,590 @@
     v = html_parser.makeelement(*args, **kw)
     return v
 
+class FormElement(HtmlElement):
+    """
+    Represents a <form> element.
+    """
+
+    def inputs(self):
+        """
+        Returns an accessor for all the input elements in the form.
+
+        See `InputGetter` for more information about the object.
+        """
+        return InputGetter(self)
+    inputs = property(inputs, doc=inputs.__doc__)
+
+    def _name(self):
+        if self.get('name'):
+            return self.get('name')
+        elif self.get('id'):
+            return '#' + self.get('id')
+        return str(self.body.findall('form').index(self))
+
+    def form_values(self):
+        """
+        Return a list of tuples of the field values for the form.
+        This is suitable to be passed to ``urllib.urlencode()``.
+        """
+        results = []
+        for el in self.inputs:
+            name = el.name
+            if not name:
+                continue
+            if el.tag == 'textarea':
+                results.append((name, el.value))
+            elif el.tag == 'select':
+                value = el.value
+                if el.multiple:
+                    for v in value:
+                        results.append((name, v))
+                elif value is not None:
+                    results.append((name, el.value))
+            else:
+                assert el.tag == 'input', (
+                    "Unexpected tag: %r" % el)
+                if el.checkable and not el.checked:
+                    continue
+                if el.type in ('submit', 'image', 'reset'):
+                    continue
+                value = el.value
+                if value is not None:
+                    results.append((name, el.value))
+        return results
+
+    def action__get(self):
+        """
+        Get/set the form's ``action`` attribute.
+        """
+        return self.get('action')
+    def action__set(self, value):
+        self.set('action', value)
+    def action__del(self):
+        if 'action' in self.attrib:
+            del self.attrib['action']
+    action = property(action__get, action__set, action__del, doc=action__get.__doc__)
+
+    def method__get(self):
+        """
+        Get/set the form's method.  Always returns a capitalized
+        string, and defaults to ``'GET'``
+        """
+        return self.get('method', 'GET').upper()
+    def method__set(self, value):
+        self.set('method', value.upper())
+    method = property(method__get, method__set, doc=method__get.__doc__)
+
+    def submit(self, extra_values=None):
+        """
+        Submit the form.  Returns a file-like object, from
+        ``urllib.urlopen()``.  This object also has a ``.geturl()`` function,
+        which shows the URL if there were any redirects.
+
+        You can use this like::
+
+            >>> form = doc.forms[0]
+            >>> form.inputs['foo'].value = 'bar' # etc
+            >>> response = form.submit()
+            >>> doc = parse(response)
+            >>> doc.make_links_absolute(response.geturl())
+        """
+        values = self.form_values()
+        if extra_values:
+            if hasattr(extra_values, 'items'):
+                extra_values = extra_values.items()
+            values.extend(extra_values)
+        import urllib
+        action = self.action
+        ## FIXME: should test that it's not a relative URL or something
+        if self.method == 'GET':
+            if '?' in action:
+                action += '&'
+            else:
+                action += '?'
+            action += urllib.urlencode(values)
+            data = None
+        else:
+            data = urllib.urlencode(values)
+        return urllib.urlopen(action, data)
+
+HtmlLookup._elements['form'] = FormElement
+
+class InputGetter(object):
+
+    """
+    An accessor that represents all the input fields in a form.
+
+    You can get fields by name from this, with
+    ``form.inputs['field_name']``.  If there are a set of checkboxes
+    with the same name, they are returned as a list (a `CheckboxGroup`
+    which also allows value setting).  Radio inputs are handled
+    similarly.
+
+    You can also iterate over this to get all input elements.  This
+    won't return the same thing as if you get all the names, as
+    checkboxes and radio elements are returned individually.
+    """
+
+    _name_xpath = etree.XPath(".//*[@name = $name and (name(.) = 'select' or name(.) = 'input' or name(.) = 'textarea')]")
+    _all_xpath = etree.XPath(".//*[name() = 'select' or name() = 'input' or name() = 'textarea']")
+
+    def __init__(self, form):
+        self.form = form
+
+    def __repr__(self):
+        return '<%s for form %s>' % (
+            self.__class__.__name__,
+            self.form._name())
+
+    ## FIXME: there should be more methods, and it's unclear if this is
+    ## a dictionary-like object or list-like object
+
+    def __getitem__(self, name):
+        results = self._name_xpath(self.form, name=name)
+        if results:
+            type = results[0].get('type')
+            if type == 'radio' and len(results) > 1:
+                group = RadioGroup(results)
+                group.name = name
+                return group
+            elif type == 'checkbox' and len(results) > 1:
+                group = CheckboxGroup(results)
+                group.name = name
+                return group
+            else:
+                # I don't like throwing away elements like this
+                return results[0]
+        else:
+            raise KeyError(
+                "No input element with the name %r" % name)
+
+    def __iter__(self):
+        ## FIXME: kind of dumb to turn a list into an iterator, only
+        ## to have it likely turned back into a list again :(
+        return iter(self._all_xpath(self.form))
+
+class InputMixin(object):
+
+    """
+    Mix-in for all input elements (input, select, and textarea)
+    """
+
+
+    def name__get(self):
+        """
+        Get/set the name of the element
+        """
+        return self.get('name')
+    def name__set(self, value):
+        self.set('name', value)
+    def name__del(self):
+        if 'name' in self.attrib:
+            del self.attrib['name']
+    name = property(name__get, name__set, name__del, doc=name__get.__doc__)
+
+    def __repr__(self):
+        type = getattr(self, 'type', None)
+        if type:
+            type = ' type=%r' % type
+        else:
+            type = ''
+        return '<%s %x name=%r%s>' % (
+            self.__class__.__name__, id(self), self.name, type)
+    
+class TextareaElement(InputMixin, HtmlElement):
+    """
+    ``<textarea>`` element.  You can get the name with ``.name`` and
+    get/set the value with ``.value``
+    """
+
+    def value__get(self):
+        """
+        Get/set the value (which is the contents of this element)
+        """
+        return self.text or ''
+    def value__set(self, value):
+        self.text = value
+    def value__del(self):
+        self.text = ''
+    value = property(value__get, value__set, value__del, doc=value__get.__doc__)
+
+HtmlLookup._elements['textarea'] = TextareaElement
+
+class SelectElement(InputMixin, HtmlElement):
+    """
+    ``<select>`` element.  You can get the name with ``.name``.
+
+    ``.value`` will be the value of the selected option, unless this
+    is a multi-select element (``<select multiple>``), in which case
+    it will be a set-like object.  In either case ``.value_options``
+    gives the possible values.
+
+    The boolean attribute ``.multiple`` shows if this is a
+    multi-select.
+    """
+
+    def value__get(self):
+        """
+        Get/set the value of this select (the selected option).
+
+        If this is a multi-select, this is a set-like object that
+        represents all the selected options.
+        """
+        if self.multiple:
+            return MultipleSelectOptions(self)
+        for el in self.getiterator('option'):
+            if 'selected' in el.attrib:
+                value = el.get('value')
+                # FIXME: If value is None, what to return?, get_text()?
+                return value
+        return None
+
+    def value__set(self, value):
+        if self.multiple:
+            if isinstance(value, basestring):
+                raise TypeError(
+                    "You must pass in a sequence")
+            self.value.clear()
+            self.value.update(value)
+            return
+        if value is not None:
+            for el in self.getiterator('option'):
+                # FIXME: also if el.get('value') is None?
+                if el.get('value') == value:
+                    checked_option = el
+                    break
+            else:
+                raise ValueError(
+                    "There is no option with the value of %r" % value)
+        for el in self.getiterator('option'):
+            if 'selected' in el.attrib:
+                del el.attrib['selected']
+        if value is not None:
+            checked_option.set('selected', '')
+
+    def value__del(self):
+        # FIXME: should del be allowed at all?
+        if self.multiple:
+            self.value.clear()
+        else:
+            self.value = None
+
+    value = property(value__get, value__set, value__del, doc=value__get.__doc__)
+
+    def value_options(self):
+        """
+        All the possible values this select can have (the ``value``
+        attribute of all the ``<option>`` elements.
+        """
+        return [el.get('value') for el in self.getiterator('option')]
+    value_options = property(value_options, doc=value_options.__doc__)
+
+    def multiple__get(self):
+        """
+        Boolean attribute: is there a ``multiple`` attribute on this element.
+        """
+        return 'multiple' in self.attrib
+    def multiple__set(self, value):
+        if value:
+            self.set('multiple', '')
+        elif 'multiple' in self.attrib:
+            del self.attrib['multiple']
+    multiple = property(multiple__get, multiple__set, doc=multiple__get.__doc__)
+
+HtmlLookup._elements['select'] = SelectElement
+
+class MultipleSelectOptions(SetMixin):
+    """
+    Represents all the selected options in a ``<select multiple>`` element.
+
+    You can add to this set-like option to select an option, or remove
+    to unselect the option.
+    """
+
+    def __init__(self, select):
+        self.select = select
+
+    def options(self):
+        """
+        Iterator of all the ``<option>`` elements.
+        """
+        return self.select.getiterator('option')
+    options = property(options)
+
+    def __iter__(self):
+        for option in self.options:
+            yield option.get('value')
+
+    def add(self, item):
+        for option in self.options:
+            if option.get('value') == item:
+                option.set('selected', '')
+                break
+        else:
+            raise ValueError(
+                "There is no option with the value %r" % item)
+
+    def remove(self, item):
+        for option in self.options:
+            if option.get('value') == item:
+                if 'selected' in option.attrib:
+                    del option.attrib['selected']
+                else:
+                    raise ValueError(
+                        "The option %r is not currently selected" % item)
+                break
+        else:
+            raise ValueError(
+                "There is not option with the value %r" % item)
+
+    def __repr__(self):
+        return '<%s {%s} for select name=%r>' % (
+            self.__class__.__name__,
+            ', '.join([repr(v) for v in self]),
+            self.select.name)
+
+class RadioGroup(list):
+    """
+    This object represents several ``<input type=radio>`` elements
+    that have the same name.
+
+    You can use this like a list, but also use the property
+    ``.value`` to check/uncheck inputs.  Also you can use
+    ``.value_options`` to get the possible values.
+    """
+
+    def value__get(self):
+        """
+        Get/set the value, which checks the radio with that value (and
+        unchecks any other value).
+        """
+        for el in self:
+            if 'checked' in el.attrib:
+                return el.get('value')
+        return None
+
+    def value__set(self, value):
+        if value is not None:
+            for el in self:
+                if el.get('value') == value:
+                    checked_option = el
+                    break
+            else:
+                raise ValueError(
+                    "There is no radio input with the value %r" % value)
+        for el in self:
+            if 'checked' in el.attrib:
+                del el.attrib['checked']
+        if value is not None:
+            checked_option.set('checked', '')
+
+    def value__del(self):
+        self.value = None
+
+    value = property(value__get, value__set, value__del, doc=value__get.__doc__)
+
+    def value_options(self):
+        """
+        Returns a list of all the possible values.
+        """
+        return [el.get('value') for el in self]
+    value_options = property(value_options, doc=value_options.__doc__)
+
+    def __repr__(self):
+        return '%s(%s)' % (
+            self.__class__.__name__,
+            list.__repr__(self))
+
+class CheckboxGroup(list):
+    """
+    Represents a group of checkboxes (``<input type=checkbox>``) that
+    have the same name.
+
+    In addition to using this like a list, the ``.values`` attribute
+    returns a set-like object that you can add to or remove from to
+    check and uncheck checkboxes.  You can also use ``.value_options``
+    to get the possible values.
+    """
+
+    ## FIXME: should this be named .value?
+    def values__get(self):
+        """
+        Return a set-like object that can be modified to check or
+        uncheck individual checkboxes according to their value.
+        """
+        return CheckboxValues(self)
+    def values__set(self, value):
+        self.values.clear()
+        self.values |= value
+    def values__del(self):
+        self.values.clear()
+    values = property(values__get, values__set, values__del, doc=values__get.__doc__)
+
+    def __repr__(self):
+        return '%s(%s)' % (
+            self.__class__.__name__, list.__repr__(self))
+
+class CheckboxValues(SetMixin):
+
+    """
+    Represents the values of the checked checkboxes in a group of
+    checkboxes with the same name.
+    """
+
+    def __init__(self, group):
+        self.group = group
+
+    def __iter__(self):
+        return iter([
+            el.get('value')
+            for el in self.group
+            if 'checked' in el.attrib])
+
+    def add(self, value):
+        for el in self.group:
+            if el.get('value') == value:
+                el.set('checked', '')
+                break
+        else:
+            raise KeyError("No checkbox with value %r" % value)
+
+    def remove(self, value):
+        for el in self.group:
+            if el.get('value') == value:
+                if 'checked' in el.attrib:
+                    del el.attrib['checked']
+                else:
+                    raise KeyError(
+                        "The checkbox with value %r was already unchecked" % value)
+                break
+        else:
+            raise KeyError(
+                "No checkbox with value %r" % value)
+
+    def __repr__(self):
+        return '<%s {%s} for checkboxes name=%r>' % (
+            self.__class__.__name__,
+            ', '.join([repr(v) for v in self]),
+            self.group.name)
+
+class InputElement(InputMixin, HtmlElement):
+    """
+    Represents an ``<input>`` element.
+
+    You can get the type with ``.type`` (which is lower-cased and
+    defaults to ``'text'``).
+
+    Also you can get and set the value with ``.value``
+
+    Checkboxes and radios have the attribute ``input.checkable ==
+    True`` (for all others it is false) and a boolean attribute
+    ``.checked``.
+    
+    """
+    
+    ## FIXME: I'm a little uncomfortable with the use of .checked
+    def value__get(self):
+        """
+        Get/set the value of this element, using the ``value`` attribute.
+
+        Also, if this is a checkbox and it has no value, this defaults
+        to ``'on'``.  If it is a checkbox or radio that is not
+        checked, this returns None.
+        """
+        if self.checkable:
+            if self.checked:
+                return self.get('value') or 'on'
+            else:
+                return None
+        return self.get('value')
+    def value__set(self, value):
+        if self.checkable:
+            if not value:
+                self.checked = False
+            else:
+                self.checked = True
+                if isinstance(value, basestring):
+                    self.set('value', value)
+        else:
+            self.set('value', value)
+    def value__del(self):
+        if self.checkable:
+            self.checked = False
+        else:
+            if 'value' in self.attrib:
+                del self.attrib['value']
+    value = property(value__get, value__set, value__del, doc=value__get.__doc__)
+
+    def type__get(self):
+        """
+        Return the type of this element (using the type attribute).
+        """
+        return self.get('type', 'text').lower()
+    def type__set(self, value):
+        self.set('type', value)
+    type = property(type__get, type__set, doc=type__get.__doc__)
+
+    def checkable__get(self):
+        """
+        Boolean: can this element be checked?
+        """
+        return self.type in ['checkbox', 'radio']
+    checkable = property(checkable__get, doc=checkable.__doc__)
+
+    def checked__get(self):
+        """
+        Boolean attribute to get/set the presence of the ``checked``
+        attribute.
+
+        You can only use this on checkable input types.
+        """
+        if not self.checkable:
+            raise AttributeError('Not a checkable input type')
+        return 'checked' in self.attrib
+    def checked__set(self, value):
+        if not self.checkable:
+            raise AttributeError('Not a checkable input type')
+        if value:
+            self.set('checked', '')
+        else:
+            if 'checked' in self.attrib:
+                del self.attrib['checked']
+    checked = property(checked__get, checked__set, doc=checked__get.__doc__)
+
+HtmlLookup._elements['input'] = InputElement
+
+class LabelElement(HtmlElement):
+    """
+    Represents a ``<label>`` element.
+
+    Label elements are linked to other elements with their ``for``
+    attribute.  You can access this element with ``label.for_element``.
+    """
+    
+    def for_element__get(self):
+        """
+        Get/set the element this label points to.  Return None if it
+        can't be found.
+        """
+        id = self.get('for')
+        if not id:
+            return None
+        return self.body.get_element_by_id(id)
+    def for_element__set(self, other):
+        id = other.get('id')
+        if not id:
+            raise TypeError(
+                "Element %r has no id attribute" % other)
+        self.set('for', id)
+    def for_element__del(self):
+        if 'id' in self.attrib:
+            del self.attrib['id']
+    for_element = property(for_element__get, for_element__set, for_element__del,
+                           doc=for_element__get.__doc__)
+
+HtmlLookup._elements['label'] = LabelElement
+
 ############################################################
 ## Serialization
 ############################################################
@@ -488,3 +1140,19 @@
     if not include_meta_content_type:
         html = __replace_meta_content_type('', html)
     return html
+
+def open_in_browser(doc):
+    """
+    Open the HTML document in a web browser (saving it to a temporary
+    file to open it).
+    """
+    import os
+    import webbrowser
+    fn = os.tempnam() + '.html'
+    f = open(fn, 'wb')
+    f.write(tostring(doc, include_meta_content_type=True))
+    f.close()
+    url = 'file://' + fn.replace(os.path.sep, '/')
+    print url
+    webbrowser.open(url)
+    

Added: lxml/branch/html/src/lxml/html/setmixin.py
==============================================================================
--- (empty file)
+++ lxml/branch/html/src/lxml/html/setmixin.py	Mon Jul 16 08:13:09 2007
@@ -0,0 +1,104 @@
+class SetMixin(object):
+
+    """
+    Mix-in for sets.  You must define __iter__, add, remove
+    """
+
+    def __len__(self):
+        return len(list(self))
+
+    def __contains__(self, item):
+        for has_item in self:
+            if item == has_item:
+                return True
+        return False
+
+    def issubset(self, other):
+        for item in other:
+            if item not in self:
+                return False
+        return True
+
+    __le__ = issubset
+
+    def issuperset(self, other):
+        for item in self:
+            if item not in other:
+                return False
+        return True
+
+    __ge__ = issuperset
+
+    def union(self, other):
+        return self | other
+
+    def __or__(self, other):
+        new = self.copy()
+        new |= other
+        return new
+    
+    def intersection(self, other):
+        return self & other
+
+    def __and__(self, other):
+        new = self.copy()
+        new &= other
+        return new
+
+    def difference(self, other):
+        return self - other
+
+    def __sub__(self, other):
+        new = self.copy()
+        new -= other
+        return new
+
+    def symmetric_difference(self, other):
+        return self ^ other
+
+    def __xor__(self, other):
+        new = self.copy()
+        new ^= other
+        return new
+
+    def copy(self):
+        return set(self)
+
+    def update(self, other):
+        for item in other:
+            self.add(item)
+
+    __ior__ = update
+
+    def intersection_update(self, other):
+        for item in self:
+            if item not in other:
+                self.remove(item)
+
+    __iand__ = intersection_update
+
+    def difference_update(self, other):
+        for item in other:
+            if item in self:
+                self.remove(item)
+
+    __isub__ = difference_update
+
+    def symmetric_difference_update(self, other):
+        for item in other:
+            if item in self:
+                self.remove(item)
+            else:
+                self.add(item)
+
+    __ixor__ = symmetric_difference_update
+
+    def discard(self, item):
+        try:
+            self.remove(item)
+        except KeyError:
+            pass
+
+    def clear(self):
+        for item in list(self):
+            self.remove(item)

Added: lxml/branch/html/src/lxml/html/tests/test_forms.py
==============================================================================
--- (empty file)
+++ lxml/branch/html/src/lxml/html/tests/test_forms.py	Mon Jul 16 08:13:09 2007
@@ -0,0 +1,10 @@
+import unittest
+from lxml.tests.common_imports import doctest
+
+def test_suite():
+    suite = unittest.TestSuite()
+    suite.addTests([doctest.DocFileSuite('test_forms.txt')])
+    return suite
+
+if __name__ == '__main__':
+    unittest.main()

Added: lxml/branch/html/src/lxml/html/tests/test_forms.txt
==============================================================================
--- (empty file)
+++ lxml/branch/html/src/lxml/html/tests/test_forms.txt	Mon Jul 16 08:13:09 2007
@@ -0,0 +1,123 @@
+>>> from lxml.html import usedoctest
+>>> from lxml.html import fromstring, tostring
+>>> h = fromstring('''<html><body>
+... <form action="test">
+...   <input type="hidden" name="hidden_field" value="hidden_value">
+...   <input type="text" name="text_field" value="text_value">
+...   <input type="checkbox" name="single_checkbox">
+...   <input type="checkbox" name="single_checkbox2" value="good">
+...   <input type="checkbox" name="check_group" value="1">
+...   <input type="checkbox" name="check_group" value="2" checked>
+...   <input type="checkbox" name="check_group" value="3" checked>
+...   <input type="checkbox" name="check_group" value="4">
+...   <textarea name="textarea_field">some text</textarea>
+...   <label for="value1">value 1</label>
+...     <input type="radio" name="radios" value="value1" id="value1">
+...   <label for="value2">value 2</label>
+...     <input type="radio" name="radios" value="value2" id="value2">
+...   <label for="value3">value 3</label>
+...     <input type="radio" name="radios" value="value3" id="value3" checked>
+...   <select name="select1">
+...     <option>No value</option>
+...     <option value="">Empty</option>
+...     <option value="1">number 1</option>
+...   </select>
+...   <select name="select2" multiple>
+...     <option value="1">number 1</option>
+...     <option value="2">number 2</option>
+...     <option value="3">number 3</option>
+...   </select>
+...   <input type="submit" name="submit1" value="submit">
+...   <input type="submit" name="submit2" value="submit">
+...   <input type="reset" name="reset1">linksys
+... </form>
+... </body></html>''')
+>>> f = h.forms[0]
+>>> f.action
+'test'
+>>> f.method
+'GET'
+>>> f.inputs
+<InputGetter for form 0>
+>>> hidden = f.inputs['hidden_field']
+>>> hidden.checkable
+False
+>>> hidden.value
+'hidden_value'
+>>> hidden.value = 'new value'
+>>> print tostring(hidden)
+<input type="hidden" name="hidden_field" value="new value">
+>>> checkbox = f.inputs['single_checkbox']
+>>> checkbox.checkable
+True
+>>> checkbox.type
+'checkbox'
+>>> checkbox.checked
+False
+>>> print checkbox.value
+None
+>>> checkbox.checked = True
+>>> checkbox.value
+'on'
+>>> print tostring(checkbox)
+<input type="checkbox" name="single_checkbox" checked>
+>>> checkbox2 = f.inputs['single_checkbox2']
+>>> checkbox2.checked = True
+>>> checkbox2.value
+'good'
+>>> group = f.inputs['check_group']
+>>> group.values
+<CheckboxValues {'2', '3'} for checkboxes name='check_group'>
+>>> group.values.add('1')
+>>> group.values
+<CheckboxValues {'1', '2', '3'} for checkboxes name='check_group'>
+>>> print tostring(group[0])
+<input type="checkbox" name="check_group" value="1" checked>
+>>> group.values.add('doesnotexist')
+Traceback (most recent call last):
+    ...
+KeyError: "No checkbox with value 'doesnotexist'"
+>>> textarea = f.inputs['textarea_field']
+>>> textarea.value
+'some text'
+>>> radios = f.inputs['radios']
+>>> radios[0].label.text
+'value 1'
+>>> radios.value
+'value3'
+>>> radios.value = 'value1'
+>>> radios.value
+'value1'
+>>> print tostring(radios[0])
+<input type="radio" name="radios" value="value1" id="value1" checked>
+>>> radios.value = None
+>>> print tostring(radios[0])
+<input type="radio" name="radios" value="value1" id="value1">
+>>> radios.value_options
+['value1', 'value2', 'value3']
+>>> select = f.inputs['select1']
+>>> print select.value
+None
+>>> select.value = ""
+>>> select.value
+''
+>>> select.value = 'asdf'
+Traceback (most recent call last):
+    ...
+ValueError: There is no option with the value of 'asdf'
+>>> select.value_options
+[None, '', '1']
+>>> select = f.inputs['select2']
+>>> select.value
+<MultipleSelectOptions {'1', '2', '3'} for select name='select2'>
+>>> select.value.update(['2', '3'])
+>>> select.value.remove('3')
+>>> select.value.add('asdf')
+Traceback (most recent call last):
+    ...
+ValueError: There is no option with the value 'asdf'
+>>> select.value_options
+['1', '2', '3']
+>>> import urllib
+>>> print urllib.urlencode(f.form_values())
+hidden_field=new+value&text_field=text_value&single_checkbox=on&single_checkbox2=good&check_group=1&check_group=2&check_group=3&textarea_field=some+text&select1=&select2=1&select2=2&select2=3


More information about the lxml-checkins mailing list