[Lxml-checkins] r44986 - in lxml/branch/html/src/lxml: . html html/tests

scoder at codespeak.net scoder at codespeak.net
Thu Jul 12 23:59:28 CEST 2007


Author: scoder
Date: Thu Jul 12 23:59:25 2007
New Revision: 44986

Modified:
   lxml/branch/html/src/lxml/doctestcompare.py
   lxml/branch/html/src/lxml/html/__init__.py
   lxml/branch/html/src/lxml/html/builder.py
   lxml/branch/html/src/lxml/html/diff.py
   lxml/branch/html/src/lxml/html/tests/test_basic.py
   lxml/branch/html/src/lxml/html/tests/test_basic.txt
   lxml/branch/html/src/lxml/html/tests/test_css.py
   lxml/branch/html/src/lxml/html/tests/test_css_select.txt
   lxml/branch/html/src/lxml/html/tests/test_rewritelinks.txt
Log:
renamed HTML() to document_fromstring and HTMLFragmentS() to fragmentS_fromstring()

Modified: lxml/branch/html/src/lxml/doctestcompare.py
==============================================================================
--- lxml/branch/html/src/lxml/doctestcompare.py	(original)
+++ lxml/branch/html/src/lxml/doctestcompare.py	Thu Jul 12 23:59:25 2007
@@ -26,7 +26,7 @@
 """
 
 from lxml import etree
-from lxml.html import HTML
+from lxml.html import document_fromstring
 import re
 import doctest
 import cgi
@@ -85,12 +85,12 @@
     def get_parser(self, want, got, optionflags):
         parser = None
         if PARSE_HTML & optionflags:
-            parser = HTML
+            parser = document_fromstring
         elif PARSE_XML & optionflags:
             parser = etree.XML
         elif (want.strip().lower().startswith('<html')
               and got.strip().startswith('<html')):
-            parser = HTML
+            parser = document_fromstring
         elif (self._looks_like_markup(want)
               and self._looks_like_markup(got)):
             parser = self.get_default_parser()
@@ -164,7 +164,7 @@
                 return '\n'.join(errors)
             else:
                 return value
-        html = parser is HTML
+        html = parser is document_fromstring
         diff_parts = []
         diff_parts.append('Expected:')
         diff_parts.append(self.format_doc(want_doc, html, 2))
@@ -325,7 +325,7 @@
 
 class LHTMLOutputChecker(LXMLOutputChecker):
     def get_default_parser(self):
-        return HTML
+        return document_fromstring
     
 def install(html=False):
     """

Modified: lxml/branch/html/src/lxml/html/__init__.py
==============================================================================
--- lxml/branch/html/src/lxml/html/__init__.py	(original)
+++ lxml/branch/html/src/lxml/html/__init__.py	Thu Jul 12 23:59:25 2007
@@ -6,7 +6,7 @@
 from lxml.html import defs
 from lxml import cssselect
 
-__all__ = ['HTML', 'tostring', 'Element', 'defs',
+__all__ = ['document_fromstring', 'tostring', 'Element', 'defs',
            'find_rel_links', 'find_class', 'make_links_absolute',
            'resolve_base_href', 'iterlinks', 'rewrite_links']
 
@@ -43,7 +43,7 @@
 
         Example::
 
-            >>> h = HTMLFragment('<div>Hello <b>World!</b></div>')
+            >>> h = fragment_fromstring('<div>Hello <b>World!</b></div>')
             >>> h.find('//b').drop_tag()
             >>> print tostring(h)
             <div>Hello World!</div>
@@ -292,7 +292,7 @@
     element=HtmlElement, comment=HtmlComment,
     pi=HtmlProcessingInstruction, entity=HtmlEntity))
 
-def HTML(html):
+def document_fromstring(html):
     # FIXME: should this notice a fragment and parse accordingly?
     value = etree.HTML(html, html_parser)
     if value is None:
@@ -300,7 +300,7 @@
             "Document is empty")
     return value
 
-def HTMLFragments(html, no_leading_text=False):
+def fragments_fromstring(html, no_leading_text=False):
     """
     Parses several HTML elements, returning a list of elements.
 
@@ -314,7 +314,7 @@
     if not start.startswith('<html') and not start.startswith('<!doctype'):
         # FIXME: That test doesn't work with a doctype or PI
         html = '<html><body>%s</body></html>' % html
-    doc = HTML(html)
+    doc = document_fromstring(html)
     assert doc.tag == 'html'
     bodies = [e for e in doc if e.tag == 'body']
     assert len(bodies) == 1, ("too many bodies: %r in %r" % (bodies, html))
@@ -330,7 +330,7 @@
     # would be nice
     return elements
 
-def HTMLFragment(html, create_parent=False):
+def fragment_fromstring(html, create_parent=False):
     """
     Parses a single HTML element; it is an error if there is more than
     one element, or if anything but whitespace precedes or follows the
@@ -342,8 +342,9 @@
     if create_parent:
         if not isinstance(create_parent, basestring):
             create_parent = 'div'
-        return HTMLFragment('<%s>%s</%s>' % (create_parent, html, create_parent))
-    elements = HTMLFragments(html, no_leading_text=True)
+        return fragment_fromstring('<%s>%s</%s>' % (
+            create_parent, html, create_parent))
+    elements = fragments_fromstring(html, no_leading_text=True)
     if not elements:
         raise etree.ParserError(
             "No elements found")
@@ -368,9 +369,9 @@
     start = html[:10].lstrip().lower()
     if start.startswith('<html') or start.startswith('<!doctype'):
         # Looks like a full HTML document
-        return HTML(html)
+        return document_fromstring(html)
     # otherwise, lets parse it out...
-    doc = HTML(html)
+    doc = document_fromstring(html)
     bodies = doc.findall('body')
     if bodies:
         body = bodies[0]

Modified: lxml/branch/html/src/lxml/html/builder.py
==============================================================================
--- lxml/branch/html/src/lxml/html/builder.py	(original)
+++ lxml/branch/html/src/lxml/html/builder.py	Thu Jul 12 23:59:25 2007
@@ -3,7 +3,7 @@
 
 Usage::
 
-    >>> from lxml.htmlbuilder import *
+    >>> from lxml.html.builder import *
     >>> html = HTML(
     ...            HEAD( TITLE("Hello World") ),
     ...            BODY( CLASS("main"),

Modified: lxml/branch/html/src/lxml/html/diff.py
==============================================================================
--- lxml/branch/html/src/lxml/html/diff.py	(original)
+++ lxml/branch/html/src/lxml/html/diff.py	Thu Jul 12 23:59:25 2007
@@ -1,6 +1,6 @@
 import difflib
 from lxml import etree
-from lxml.html import HTMLFragment
+from lxml.html import fragment_fromstring
 import cgi
 import re
 
@@ -531,7 +531,7 @@
     if cleanup:
         # This removes any extra markup or structure like <head>:
         html = cleanup_html(html)
-    return HTMLFragment(html, create_parent=True)
+    return fragment_fromstring(html, create_parent=True)
 
 _body_re = re.compile(r'<body.*?>', re.I|re.S)
 _end_body_re = re.compile(r'</body.*?>', re.I|re.S)

Modified: lxml/branch/html/src/lxml/html/tests/test_basic.py
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_basic.py	(original)
+++ lxml/branch/html/src/lxml/html/tests/test_basic.py	Thu Jul 12 23:59:25 2007
@@ -1,8 +1,6 @@
 import unittest
 from lxml.tests.common_imports import doctest
 
-from lxml.html import HTML
-
 def test_suite():
     suite = unittest.TestSuite()
     suite.addTests([doctest.DocFileSuite('test_basic.txt')])

Modified: lxml/branch/html/src/lxml/html/tests/test_basic.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_basic.txt	(original)
+++ lxml/branch/html/src/lxml/html/tests/test_basic.txt	Thu Jul 12 23:59:25 2007
@@ -1,10 +1,10 @@
 lxml.html adds a find_class method to elements::
 
     >>> from lxml.etree import Comment
-    >>> from lxml.html import HTML, HTMLFragment, tostring
+    >>> from lxml.html import document_fromstring, fragment_fromstring, tostring
     >>> from lxml.html.clean import clean, clean_html
     >>> from lxml.html import usedoctest
-    >>> h = HTML('''
+    >>> h = document_fromstring('''
     ... <html><head></head>
     ... <body>
     ...   <a class="vcard
@@ -28,7 +28,7 @@
 Also added is a get_rel_links, which you can use to search for links
 like ``<a rel="$something">``::
 
-    >>> h = HTML('''
+    >>> h = document_fromstring('''
     ... <a href="1">test 1</a>
     ... <a href="2" rel="tag">item 2</a>
     ... <a href="3" rel="tagging">item 3</a>
@@ -40,7 +40,7 @@
 
 Another method is ``get_element_by_id`` that does what it says::
 
-    >>> print tostring(HTMLFragment('''
+    >>> print tostring(fragment_fromstring('''
     ... <div>
     ...  <span id="test">stuff</span>
     ... </div>''').get_element_by_id('test'))
@@ -48,14 +48,14 @@
 
 Or to get the content of an element without the tags, use text_content()::
 
-    >>> el = HTMLFragment('''
+    >>> el = fragment_fromstring('''
     ... <div>This is <a href="foo">a <b>bold</b> link</a></div>''')
     >>> el.text_content()
     'This is a bold link'
 
 Or drop an element (leaving its content) or the entire tree, like::
 
-    >>> doc = HTML('''
+    >>> doc = document_fromstring('''
     ... <html>
     ...  <body>
     ...   <div id="body">

Modified: lxml/branch/html/src/lxml/html/tests/test_css.py
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_css.py	(original)
+++ lxml/branch/html/src/lxml/html/tests/test_css.py	Thu Jul 12 23:59:25 2007
@@ -70,7 +70,7 @@
         f = open(doc_fn, 'rb')
         c = f.read()
         f.close()
-        doc = html.HTML(c)
+        doc = html.document_fromstring(c)
         body = doc.xpath('//body')[0]
         bad = []
         selector, count = self.selectors[self.index]

Modified: lxml/branch/html/src/lxml/html/tests/test_css_select.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_css_select.txt	(original)
+++ lxml/branch/html/src/lxml/html/tests/test_css_select.txt	Thu Jul 12 23:59:25 2007
@@ -2,8 +2,8 @@
 all our selections, and a function make querying simpler:
 
     >>> from lxml.cssselect import CSSSelector
-    >>> from lxml.html import HTML
-    >>> doc = HTML('''
+    >>> from lxml.html import document_fromstring
+    >>> doc = document_fromstring('''
     ... <html><head></head><body>
     ... <div id="outer-div">
     ...  <a id="name-anchor" name="foo"></a>

Modified: lxml/branch/html/src/lxml/html/tests/test_rewritelinks.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_rewritelinks.txt	(original)
+++ lxml/branch/html/src/lxml/html/tests/test_rewritelinks.txt	Thu Jul 12 23:59:25 2007
@@ -75,7 +75,7 @@
 is something embedded).  It returns a generator of ``(element, attrib,
 link)``, which is awkward to test here, so we'll make a printer::
 
-    >>> from lxml.html import iterlinks, HTML, tostring
+    >>> from lxml.html import iterlinks, document_fromstring, tostring
     >>> def print_iter(seq):
     ...     for element, attrib, link, pos in seq:
     ...         if pos:


More information about the lxml-checkins mailing list