[Lxml-checkins] r45167 - in lxml/branch/html/src/lxml/html: . tests

ianb at codespeak.net ianb at codespeak.net
Tue Jul 17 20:56:35 CEST 2007


Author: ianb
Date: Tue Jul 17 20:56:34 2007
New Revision: 45167

Modified:
   lxml/branch/html/src/lxml/html/__init__.py
   lxml/branch/html/src/lxml/html/tests/test_forms.txt
Log:
Fix base_url -- both the property, and pass it through the *_html versions of functions.  Added form.fields, which is a dictionary-like object you can use to set values directly.

Modified: lxml/branch/html/src/lxml/html/__init__.py
==============================================================================
--- lxml/branch/html/src/lxml/html/__init__.py	(original)
+++ lxml/branch/html/src/lxml/html/__init__.py	Tue Jul 17 20:56:34 2007
@@ -6,6 +6,12 @@
 from lxml.html import defs
 from lxml import cssselect
 from lxml.html.setmixin import SetMixin
+try:
+    from UserDict import DictMixin
+except ImportError:
+    # DictMixin was introduced in Python 2.4
+    from lxml.html._dictmixin import DictMixin
+import sets
 
 __all__ = ['document_fromstring', 'tostring', 'Element', 'defs',
            'find_rel_links', 'find_class', 'make_links_absolute',
@@ -29,7 +35,7 @@
         Use with ``urlparse.urljoin(el.base_url, href)`` to get
         absolute URLs.
         """
-        return self.gettreeroot().docinfo.URL
+        return self.getroottree().docinfo.URL
     base_url = property(base_url, doc=base_url.__doc__)
 
     def forms(self):
@@ -311,7 +317,7 @@
                 raise TypeError(
                     "The keyword 'copy' can only be used with element inputs to %s, not a string input" % self.name)
             return_string = True
-            doc = fromstring(doc)
+            doc = fromstring(doc, **kw)
         else:
             if 'copy' in kw:
                 copy = kw.pop('copy')
@@ -372,7 +378,7 @@
 html_parser.setElementClassLookup(HtmlLookup())
 
 def document_fromstring(html, **kw):
-    value = etree.HTML(html, html_parser)
+    value = etree.HTML(html, html_parser, **kw)
     if value is None:
         raise etree.ParserError(
             "Document is empty")
@@ -534,6 +540,27 @@
         return InputGetter(self)
     inputs = property(inputs, doc=inputs.__doc__)
 
+    def fields__get(self):
+        """
+        Dictionary-like object that represents all the fields in this
+        form.  You can set values in this dictionary to effect the
+        form.
+        """
+        return FieldsDict(self.inputs)
+    def fields__set(self, value):
+        prev_keys = self.fields.keys()
+        for key, value in value.iteritems():
+            if key in prev_keys:
+                prev_keys.remove(key)
+            self.fields[key] = value
+        for key in prev_keys:
+            # FIXME: but right now I don't even allow
+            # deleting, and I'm not sure what it would
+            # mean if I did.
+            del self.fields[key]
+
+    fields = property(fields__get, fields__set, doc=fields__get.__doc__)
+
     def _name(self):
         if self.get('name'):
             return self.get('name')
@@ -634,6 +661,27 @@
 
 HtmlLookup._elements['form'] = FormElement
 
+class FieldsDict(DictMixin):
+
+    def __init__(self, inputs):
+        self.inputs = inputs
+    def __getitem__(self, item):
+        return self.inputs[item].value
+    def __setitem__(self, item, value):
+        self.inputs[item].value = value
+    def __delitem__(self, item):
+        raise KeyError(
+            "You cannot remove keys from ElementDict")
+    def keys(self):
+        return self.inputs.keys()
+    def __contains__(self, item):
+        return item in self.inputs
+
+    def __repr__(self):
+        return '<%s for form %s>' % (
+            self.__class__.__name__,
+            self.inputs.form._name())
+
 class InputGetter(object):
 
     """
@@ -683,6 +731,16 @@
             raise KeyError(
                 "No input element with the name %r" % name)
 
+    def __contains__(self, name):
+        results = self._name_xpath(self.form, name=name)
+        return bool(results)
+
+    def keys(self):
+        names = sets.Set()
+        for el in self:
+            names.add(el.name)
+        return list(names)
+
     def __iter__(self):
         ## FIXME: kind of dumb to turn a list into an iterator, only
         ## to have it likely turned back into a list again :(

Modified: lxml/branch/html/src/lxml/html/tests/test_forms.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_forms.txt	(original)
+++ lxml/branch/html/src/lxml/html/tests/test_forms.txt	Tue Jul 17 20:56:34 2007
@@ -31,10 +31,12 @@
 ...   <input type="submit" name="submit2" value="submit">
 ...   <input type="reset" name="reset1">linksys
 ... </form>
-... </body></html>''')
+... </body></html>''', base_url='http://example.org/form.html')
+>>> h.base_url
+'http://example.org/form.html'
 >>> f = h.forms[0]
 >>> f.action
-'test'
+'http://example.org/test'
 >>> f.method
 'GET'
 >>> f.inputs
@@ -66,14 +68,14 @@
 >>> checkbox2.value
 'good'
 >>> group = f.inputs['check_group']
->>> group.values
+>>> group.value
 <CheckboxValues {'2', '3'} for checkboxes name='check_group'>
->>> group.values.add('1')
->>> group.values
+>>> group.value.add('1')
+>>> group.value
 <CheckboxValues {'1', '2', '3'} for checkboxes name='check_group'>
 >>> print tostring(group[0])
 <input type="checkbox" name="check_group" value="1" checked>
->>> group.values.add('doesnotexist')
+>>> group.value.add('doesnotexist')
 Traceback (most recent call last):
     ...
 KeyError: "No checkbox with value 'doesnotexist'"
@@ -121,3 +123,21 @@
 >>> import urllib
 >>> print urllib.urlencode(f.form_values())
 hidden_field=new+value&text_field=text_value&single_checkbox=on&single_checkbox2=good&check_group=1&check_group=2&check_group=3&textarea_field=some+text&select1=&select2=1&select2=2&select2=3
+>>> fields = f.fields
+>>> fields
+<FieldsDict for form 0>
+>>> for name, value in fields.items():
+...     print '%s: %r' % (name, value)
+textarea_field: 'some text'
+radios: None
+submit2: 'submit'
+submit1: 'submit'
+select1: ''
+hidden_field: 'new value'
+text_field: 'text_value'
+reset1: None
+single_checkbox: 'on'
+select2: <MultipleSelectOptions {'1', '2', '3'} for select name='select2'>
+single_checkbox2: 'good'
+check_group: <CheckboxValues {'1', '2', '3'} for checkboxes name='check_group'>
+


More information about the lxml-checkins mailing list