[Lxml-checkins] r44986 - in lxml/branch/html/src/lxml: . html html/tests
scoder at codespeak.net
scoder at codespeak.net
Thu Jul 12 23:59:28 CEST 2007
Author: scoder
Date: Thu Jul 12 23:59:25 2007
New Revision: 44986
Modified:
lxml/branch/html/src/lxml/doctestcompare.py
lxml/branch/html/src/lxml/html/__init__.py
lxml/branch/html/src/lxml/html/builder.py
lxml/branch/html/src/lxml/html/diff.py
lxml/branch/html/src/lxml/html/tests/test_basic.py
lxml/branch/html/src/lxml/html/tests/test_basic.txt
lxml/branch/html/src/lxml/html/tests/test_css.py
lxml/branch/html/src/lxml/html/tests/test_css_select.txt
lxml/branch/html/src/lxml/html/tests/test_rewritelinks.txt
Log:
renamed HTML() to document_fromstring and HTMLFragmentS() to fragmentS_fromstring()
Modified: lxml/branch/html/src/lxml/doctestcompare.py
==============================================================================
--- lxml/branch/html/src/lxml/doctestcompare.py (original)
+++ lxml/branch/html/src/lxml/doctestcompare.py Thu Jul 12 23:59:25 2007
@@ -26,7 +26,7 @@
"""
from lxml import etree
-from lxml.html import HTML
+from lxml.html import document_fromstring
import re
import doctest
import cgi
@@ -85,12 +85,12 @@
def get_parser(self, want, got, optionflags):
parser = None
if PARSE_HTML & optionflags:
- parser = HTML
+ parser = document_fromstring
elif PARSE_XML & optionflags:
parser = etree.XML
elif (want.strip().lower().startswith('<html')
and got.strip().startswith('<html')):
- parser = HTML
+ parser = document_fromstring
elif (self._looks_like_markup(want)
and self._looks_like_markup(got)):
parser = self.get_default_parser()
@@ -164,7 +164,7 @@
return '\n'.join(errors)
else:
return value
- html = parser is HTML
+ html = parser is document_fromstring
diff_parts = []
diff_parts.append('Expected:')
diff_parts.append(self.format_doc(want_doc, html, 2))
@@ -325,7 +325,7 @@
class LHTMLOutputChecker(LXMLOutputChecker):
def get_default_parser(self):
- return HTML
+ return document_fromstring
def install(html=False):
"""
Modified: lxml/branch/html/src/lxml/html/__init__.py
==============================================================================
--- lxml/branch/html/src/lxml/html/__init__.py (original)
+++ lxml/branch/html/src/lxml/html/__init__.py Thu Jul 12 23:59:25 2007
@@ -6,7 +6,7 @@
from lxml.html import defs
from lxml import cssselect
-__all__ = ['HTML', 'tostring', 'Element', 'defs',
+__all__ = ['document_fromstring', 'tostring', 'Element', 'defs',
'find_rel_links', 'find_class', 'make_links_absolute',
'resolve_base_href', 'iterlinks', 'rewrite_links']
@@ -43,7 +43,7 @@
Example::
- >>> h = HTMLFragment('<div>Hello <b>World!</b></div>')
+ >>> h = fragment_fromstring('<div>Hello <b>World!</b></div>')
>>> h.find('//b').drop_tag()
>>> print tostring(h)
<div>Hello World!</div>
@@ -292,7 +292,7 @@
element=HtmlElement, comment=HtmlComment,
pi=HtmlProcessingInstruction, entity=HtmlEntity))
-def HTML(html):
+def document_fromstring(html):
# FIXME: should this notice a fragment and parse accordingly?
value = etree.HTML(html, html_parser)
if value is None:
@@ -300,7 +300,7 @@
"Document is empty")
return value
-def HTMLFragments(html, no_leading_text=False):
+def fragments_fromstring(html, no_leading_text=False):
"""
Parses several HTML elements, returning a list of elements.
@@ -314,7 +314,7 @@
if not start.startswith('<html') and not start.startswith('<!doctype'):
# FIXME: That test doesn't work with a doctype or PI
html = '<html><body>%s</body></html>' % html
- doc = HTML(html)
+ doc = document_fromstring(html)
assert doc.tag == 'html'
bodies = [e for e in doc if e.tag == 'body']
assert len(bodies) == 1, ("too many bodies: %r in %r" % (bodies, html))
@@ -330,7 +330,7 @@
# would be nice
return elements
-def HTMLFragment(html, create_parent=False):
+def fragment_fromstring(html, create_parent=False):
"""
Parses a single HTML element; it is an error if there is more than
one element, or if anything but whitespace precedes or follows the
@@ -342,8 +342,9 @@
if create_parent:
if not isinstance(create_parent, basestring):
create_parent = 'div'
- return HTMLFragment('<%s>%s</%s>' % (create_parent, html, create_parent))
- elements = HTMLFragments(html, no_leading_text=True)
+ return fragment_fromstring('<%s>%s</%s>' % (
+ create_parent, html, create_parent))
+ elements = fragments_fromstring(html, no_leading_text=True)
if not elements:
raise etree.ParserError(
"No elements found")
@@ -368,9 +369,9 @@
start = html[:10].lstrip().lower()
if start.startswith('<html') or start.startswith('<!doctype'):
# Looks like a full HTML document
- return HTML(html)
+ return document_fromstring(html)
# otherwise, lets parse it out...
- doc = HTML(html)
+ doc = document_fromstring(html)
bodies = doc.findall('body')
if bodies:
body = bodies[0]
Modified: lxml/branch/html/src/lxml/html/builder.py
==============================================================================
--- lxml/branch/html/src/lxml/html/builder.py (original)
+++ lxml/branch/html/src/lxml/html/builder.py Thu Jul 12 23:59:25 2007
@@ -3,7 +3,7 @@
Usage::
- >>> from lxml.htmlbuilder import *
+ >>> from lxml.html.builder import *
>>> html = HTML(
... HEAD( TITLE("Hello World") ),
... BODY( CLASS("main"),
Modified: lxml/branch/html/src/lxml/html/diff.py
==============================================================================
--- lxml/branch/html/src/lxml/html/diff.py (original)
+++ lxml/branch/html/src/lxml/html/diff.py Thu Jul 12 23:59:25 2007
@@ -1,6 +1,6 @@
import difflib
from lxml import etree
-from lxml.html import HTMLFragment
+from lxml.html import fragment_fromstring
import cgi
import re
@@ -531,7 +531,7 @@
if cleanup:
# This removes any extra markup or structure like <head>:
html = cleanup_html(html)
- return HTMLFragment(html, create_parent=True)
+ return fragment_fromstring(html, create_parent=True)
_body_re = re.compile(r'<body.*?>', re.I|re.S)
_end_body_re = re.compile(r'</body.*?>', re.I|re.S)
Modified: lxml/branch/html/src/lxml/html/tests/test_basic.py
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_basic.py (original)
+++ lxml/branch/html/src/lxml/html/tests/test_basic.py Thu Jul 12 23:59:25 2007
@@ -1,8 +1,6 @@
import unittest
from lxml.tests.common_imports import doctest
-from lxml.html import HTML
-
def test_suite():
suite = unittest.TestSuite()
suite.addTests([doctest.DocFileSuite('test_basic.txt')])
Modified: lxml/branch/html/src/lxml/html/tests/test_basic.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_basic.txt (original)
+++ lxml/branch/html/src/lxml/html/tests/test_basic.txt Thu Jul 12 23:59:25 2007
@@ -1,10 +1,10 @@
lxml.html adds a find_class method to elements::
>>> from lxml.etree import Comment
- >>> from lxml.html import HTML, HTMLFragment, tostring
+ >>> from lxml.html import document_fromstring, fragment_fromstring, tostring
>>> from lxml.html.clean import clean, clean_html
>>> from lxml.html import usedoctest
- >>> h = HTML('''
+ >>> h = document_fromstring('''
... <html><head></head>
... <body>
... <a class="vcard
@@ -28,7 +28,7 @@
Also added is a get_rel_links, which you can use to search for links
like ``<a rel="$something">``::
- >>> h = HTML('''
+ >>> h = document_fromstring('''
... <a href="1">test 1</a>
... <a href="2" rel="tag">item 2</a>
... <a href="3" rel="tagging">item 3</a>
@@ -40,7 +40,7 @@
Another method is ``get_element_by_id`` that does what it says::
- >>> print tostring(HTMLFragment('''
+ >>> print tostring(fragment_fromstring('''
... <div>
... <span id="test">stuff</span>
... </div>''').get_element_by_id('test'))
@@ -48,14 +48,14 @@
Or to get the content of an element without the tags, use text_content()::
- >>> el = HTMLFragment('''
+ >>> el = fragment_fromstring('''
... <div>This is <a href="foo">a <b>bold</b> link</a></div>''')
>>> el.text_content()
'This is a bold link'
Or drop an element (leaving its content) or the entire tree, like::
- >>> doc = HTML('''
+ >>> doc = document_fromstring('''
... <html>
... <body>
... <div id="body">
Modified: lxml/branch/html/src/lxml/html/tests/test_css.py
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_css.py (original)
+++ lxml/branch/html/src/lxml/html/tests/test_css.py Thu Jul 12 23:59:25 2007
@@ -70,7 +70,7 @@
f = open(doc_fn, 'rb')
c = f.read()
f.close()
- doc = html.HTML(c)
+ doc = html.document_fromstring(c)
body = doc.xpath('//body')[0]
bad = []
selector, count = self.selectors[self.index]
Modified: lxml/branch/html/src/lxml/html/tests/test_css_select.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_css_select.txt (original)
+++ lxml/branch/html/src/lxml/html/tests/test_css_select.txt Thu Jul 12 23:59:25 2007
@@ -2,8 +2,8 @@
all our selections, and a function make querying simpler:
>>> from lxml.cssselect import CSSSelector
- >>> from lxml.html import HTML
- >>> doc = HTML('''
+ >>> from lxml.html import document_fromstring
+ >>> doc = document_fromstring('''
... <html><head></head><body>
... <div id="outer-div">
... <a id="name-anchor" name="foo"></a>
Modified: lxml/branch/html/src/lxml/html/tests/test_rewritelinks.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_rewritelinks.txt (original)
+++ lxml/branch/html/src/lxml/html/tests/test_rewritelinks.txt Thu Jul 12 23:59:25 2007
@@ -75,7 +75,7 @@
is something embedded). It returns a generator of ``(element, attrib,
link)``, which is awkward to test here, so we'll make a printer::
- >>> from lxml.html import iterlinks, HTML, tostring
+ >>> from lxml.html import iterlinks, document_fromstring, tostring
>>> def print_iter(seq):
... for element, attrib, link, pos in seq:
... if pos:
More information about the lxml-checkins
mailing list