[Lxml-checkins] r44882 - in lxml/branch/html/src/lxml/html: . tests

scoder at codespeak.net scoder at codespeak.net
Mon Jul 9 21:27:27 CEST 2007


Author: scoder
Date: Mon Jul  9 21:27:25 2007
New Revision: 44882

Modified:
   lxml/branch/html/src/lxml/html/__init__.py
   lxml/branch/html/src/lxml/html/clean.py
   lxml/branch/html/src/lxml/html/diff.py
   lxml/branch/html/src/lxml/html/formfill.py
   lxml/branch/html/src/lxml/html/tests/test_basic.txt
   lxml/branch/html/src/lxml/html/tests/test_clean.txt
   lxml/branch/html/src/lxml/html/tests/test_feedparser_data.py
   lxml/branch/html/src/lxml/html/tests/test_rewritelinks.txt
Log:
renamed: parse_element -> HTMLFragment, parse_elements -> HTMLFragments, parse -> fromstring

Modified: lxml/branch/html/src/lxml/html/__init__.py
==============================================================================
--- lxml/branch/html/src/lxml/html/__init__.py	(original)
+++ lxml/branch/html/src/lxml/html/__init__.py	Mon Jul  9 21:27:25 2007
@@ -43,7 +43,7 @@
 
         Example::
 
-            >>> h = parse_element('<div>Hello <b>World!</b></div>')
+            >>> h = HTMLFragment('<div>Hello <b>World!</b></div>')
             >>> h.find('//b').drop_tag()
             >>> print tostring(h)
             <div>Hello World!</div>
@@ -248,7 +248,7 @@
                 raise TypeError(
                     "The keyword 'copy' can only be used with element inputs to %s, not a string input" % self.name)
             return_string = True
-            doc = parse(doc)
+            doc = fromstring(doc)
         else:
             if 'copy' in kw:
                 copy = kw.pop('copy')
@@ -300,7 +300,7 @@
             "Document is empty")
     return value
 
-def parse_elements(html, no_leading_text=False):
+def HTMLFragments(html, no_leading_text=False):
     """
     Parses several HTML elements, returning a list of elements.
 
@@ -330,7 +330,7 @@
     # would be nice
     return elements
 
-def parse_element(html, create_parent=False):
+def HTMLFragment(html, create_parent=False):
     """
     Parses a single HTML element; it is an error if there is more than
     one element, or if anything but whitespace precedes or follows the
@@ -342,8 +342,8 @@
     if create_parent:
         if not isinstance(create_parent, basestring):
             create_parent = 'div'
-        return parse_element('<%s>%s</%s>' % (create_parent, html, create_parent))
-    elements = parse_elements(html, no_leading_text=True)
+        return HTMLFragment('<%s>%s</%s>' % (create_parent, html, create_parent))
+    elements = HTMLFragments(html, no_leading_text=True)
     if not elements:
         raise etree.ParserError(
             "No elements found")
@@ -358,7 +358,7 @@
     el.tail = None
     return el
 
-def parse(html):
+def fromstring(html):
     """
     Parse the html, returning a single element/document.
 
@@ -407,7 +407,7 @@
     # Now we have a body which represents a bunch of tags which have the
     # content that was passed in.  We will create a fake container, which
     # is the body tag, except <body> implies too much structure.
-    if _contains_block_level_tag(el):
+    if _contains_block_level_tag(body):
         body.tag = 'div'
     else:
         body.tag = 'span'

Modified: lxml/branch/html/src/lxml/html/clean.py
==============================================================================
--- lxml/branch/html/src/lxml/html/clean.py	(original)
+++ lxml/branch/html/src/lxml/html/clean.py	Mon Jul  9 21:27:25 2007
@@ -1,7 +1,7 @@
 import re
 from lxml import etree
 from lxml.html import defs
-from lxml.html import parse, tostring
+from lxml.html import fromstring, tostring
 
 try:
     set
@@ -60,7 +60,7 @@
     Like clean(), but takes a text input document, and returns a text
     document.
     """
-    doc = parse(html)
+    doc = fromstring(html)
     clean(doc, **kw)
     return tostring(doc)
 
@@ -352,7 +352,7 @@
     def clean_html(self, html):
         if isinstance(html, basestring):
             return_string = True
-            doc = parse(html)
+            doc = fromstring(html)
         else:
             return_string = False
             doc = copy.deepcopy(doc)
@@ -490,7 +490,7 @@
                 
 def autolink_html(html, *args, **kw):
     if isinstance(html, basestring):
-        doc = parse(html)
+        doc = fromstring(html)
         return_string = True
     else:
         doc = copy.deepcopy(html)
@@ -552,7 +552,7 @@
             child.tail = _break_text(child.tail, max_width, break_character)
 
 def word_break_html(html, *args, **kw):
-    doc = parse(html)
+    doc = fromstring(html)
     word_break(doc, *args, **kw)
     return tostring(doc)
 

Modified: lxml/branch/html/src/lxml/html/diff.py
==============================================================================
--- lxml/branch/html/src/lxml/html/diff.py	(original)
+++ lxml/branch/html/src/lxml/html/diff.py	Mon Jul  9 21:27:25 2007
@@ -1,6 +1,6 @@
 import difflib
 from lxml import etree
-from lxml.html import parse_element
+from lxml.html import HTMLFragment
 import cgi
 import re
 
@@ -531,7 +531,7 @@
     if cleanup:
         # This removes any extra markup or structure like <head>:
         html = cleanup_html(html)
-    return parse_element(html, create_parent=True)
+    return HTMLFragment(html, create_parent=True)
 
 _body_re = re.compile(r'<body.*?>', re.I|re.S)
 _end_body_re = re.compile(r'</body.*?>', re.I|re.S)

Modified: lxml/branch/html/src/lxml/html/formfill.py
==============================================================================
--- lxml/branch/html/src/lxml/html/formfill.py	(original)
+++ lxml/branch/html/src/lxml/html/formfill.py	Mon Jul  9 21:27:25 2007
@@ -1,5 +1,5 @@
 from lxml.etree import XPath, ElementBase
-from lxml.html import parse, tostring
+from lxml.html import fromstring, tostring
 from lxml.html import defs
 
 __all__ = ['FormNotFound', 'fill_form', 'fill_form_html',
@@ -27,7 +27,7 @@
 
 def fill_form_html(html, values, form_id=None, form_index=None):
     if isinstance(html, basestring):
-        doc = parse(html)
+        doc = fromstring(html)
         return_string = True
     else:
         doc = copy.deepcopy(html)
@@ -242,7 +242,7 @@
 
 def insert_errors_html(html, values, **kw):
     if isinstance(html, basestring):
-        doc = parse(html)
+        doc = fromstring(html)
         return_string = True
     else:
         doc = copy.deepcopy(html)

Modified: lxml/branch/html/src/lxml/html/tests/test_basic.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_basic.txt	(original)
+++ lxml/branch/html/src/lxml/html/tests/test_basic.txt	Mon Jul  9 21:27:25 2007
@@ -1,7 +1,7 @@
 lxml.html adds a find_class method to elements::
 
     >>> from lxml.etree import Comment
-    >>> from lxml.html import HTML, tostring, parse_element
+    >>> from lxml.html import HTML, HTMLFragment, tostring
     >>> from lxml.html.clean import clean, clean_html
     >>> from lxml.html import usedoctest
     >>> h = HTML('''
@@ -40,7 +40,7 @@
 
 Another method is ``get_element_by_id`` that does what it says::
 
-    >>> print tostring(HTML('''
+    >>> print tostring(HTMLFragment('''
     ... <div>
     ...  <span id="test">stuff</span>
     ... </div>''').get_element_by_id('test'))
@@ -48,7 +48,7 @@
 
 Or to get the content of an element without the tags, use text_content()::
 
-    >>> el = parse_element('''
+    >>> el = HTMLFragment('''
     ... <div>This is <a href="foo">a <b>bold</b> link</a></div>''')
     >>> el.text_content()
     'This is a bold link'

Modified: lxml/branch/html/src/lxml/html/tests/test_clean.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_clean.txt	(original)
+++ lxml/branch/html/src/lxml/html/tests/test_clean.txt	Mon Jul  9 21:27:25 2007
@@ -1,4 +1,4 @@
->>> from lxml.html import parse, tostring
+>>> from lxml.html import fromstring, tostring
 >>> from lxml.html.clean import clean, clean_html, Cleaner
 >>> from lxml.html import usedoctest
 
@@ -55,7 +55,7 @@
   </body>
 </html>
 
->>> print tostring(parse(doc))
+>>> print tostring(fromstring(doc))
 <html>
   <head>
     <script type="text/javascript" src="evil-site"></script>

Modified: lxml/branch/html/src/lxml/html/tests/test_feedparser_data.py
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_feedparser_data.py	(original)
+++ lxml/branch/html/src/lxml/html/tests/test_feedparser_data.py	Mon Jul  9 21:27:25 2007
@@ -5,7 +5,6 @@
 from lxml.tests.common_imports import doctest
 from lxml.doctestcompare import LHTMLOutputChecker
 
-from lxml.html import HTML, parse_element
 from lxml.html.clean import clean, Cleaner
 
 feed_dirs = [

Modified: lxml/branch/html/src/lxml/html/tests/test_rewritelinks.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_rewritelinks.txt	(original)
+++ lxml/branch/html/src/lxml/html/tests/test_rewritelinks.txt	Mon Jul  9 21:27:25 2007
@@ -14,7 +14,7 @@
 
 Some basics::
 
-    >>> from lxml.html import usedoctest, parse_element, tostring
+    >>> from lxml.html import usedoctest, tostring
     >>> from lxml.html import rewrite_links
     >>> print rewrite_links(
     ...     '<a href="http://old/blah/blah.html">link</a>', relocate_href)


More information about the lxml-checkins mailing list