[Lxml-checkins] r44882 - in lxml/branch/html/src/lxml/html: . tests
scoder at codespeak.net
scoder at codespeak.net
Mon Jul 9 21:27:27 CEST 2007
Author: scoder
Date: Mon Jul 9 21:27:25 2007
New Revision: 44882
Modified:
lxml/branch/html/src/lxml/html/__init__.py
lxml/branch/html/src/lxml/html/clean.py
lxml/branch/html/src/lxml/html/diff.py
lxml/branch/html/src/lxml/html/formfill.py
lxml/branch/html/src/lxml/html/tests/test_basic.txt
lxml/branch/html/src/lxml/html/tests/test_clean.txt
lxml/branch/html/src/lxml/html/tests/test_feedparser_data.py
lxml/branch/html/src/lxml/html/tests/test_rewritelinks.txt
Log:
renamed: parse_element -> HTMLFragment, parse_elements -> HTMLFragments, parse -> fromstring
Modified: lxml/branch/html/src/lxml/html/__init__.py
==============================================================================
--- lxml/branch/html/src/lxml/html/__init__.py (original)
+++ lxml/branch/html/src/lxml/html/__init__.py Mon Jul 9 21:27:25 2007
@@ -43,7 +43,7 @@
Example::
- >>> h = parse_element('<div>Hello <b>World!</b></div>')
+ >>> h = HTMLFragment('<div>Hello <b>World!</b></div>')
>>> h.find('//b').drop_tag()
>>> print tostring(h)
<div>Hello World!</div>
@@ -248,7 +248,7 @@
raise TypeError(
"The keyword 'copy' can only be used with element inputs to %s, not a string input" % self.name)
return_string = True
- doc = parse(doc)
+ doc = fromstring(doc)
else:
if 'copy' in kw:
copy = kw.pop('copy')
@@ -300,7 +300,7 @@
"Document is empty")
return value
-def parse_elements(html, no_leading_text=False):
+def HTMLFragments(html, no_leading_text=False):
"""
Parses several HTML elements, returning a list of elements.
@@ -330,7 +330,7 @@
# would be nice
return elements
-def parse_element(html, create_parent=False):
+def HTMLFragment(html, create_parent=False):
"""
Parses a single HTML element; it is an error if there is more than
one element, or if anything but whitespace precedes or follows the
@@ -342,8 +342,8 @@
if create_parent:
if not isinstance(create_parent, basestring):
create_parent = 'div'
- return parse_element('<%s>%s</%s>' % (create_parent, html, create_parent))
- elements = parse_elements(html, no_leading_text=True)
+ return HTMLFragment('<%s>%s</%s>' % (create_parent, html, create_parent))
+ elements = HTMLFragments(html, no_leading_text=True)
if not elements:
raise etree.ParserError(
"No elements found")
@@ -358,7 +358,7 @@
el.tail = None
return el
-def parse(html):
+def fromstring(html):
"""
Parse the html, returning a single element/document.
@@ -407,7 +407,7 @@
# Now we have a body which represents a bunch of tags which have the
# content that was passed in. We will create a fake container, which
# is the body tag, except <body> implies too much structure.
- if _contains_block_level_tag(el):
+ if _contains_block_level_tag(body):
body.tag = 'div'
else:
body.tag = 'span'
Modified: lxml/branch/html/src/lxml/html/clean.py
==============================================================================
--- lxml/branch/html/src/lxml/html/clean.py (original)
+++ lxml/branch/html/src/lxml/html/clean.py Mon Jul 9 21:27:25 2007
@@ -1,7 +1,7 @@
import re
from lxml import etree
from lxml.html import defs
-from lxml.html import parse, tostring
+from lxml.html import fromstring, tostring
try:
set
@@ -60,7 +60,7 @@
Like clean(), but takes a text input document, and returns a text
document.
"""
- doc = parse(html)
+ doc = fromstring(html)
clean(doc, **kw)
return tostring(doc)
@@ -352,7 +352,7 @@
def clean_html(self, html):
if isinstance(html, basestring):
return_string = True
- doc = parse(html)
+ doc = fromstring(html)
else:
return_string = False
doc = copy.deepcopy(doc)
@@ -490,7 +490,7 @@
def autolink_html(html, *args, **kw):
if isinstance(html, basestring):
- doc = parse(html)
+ doc = fromstring(html)
return_string = True
else:
doc = copy.deepcopy(html)
@@ -552,7 +552,7 @@
child.tail = _break_text(child.tail, max_width, break_character)
def word_break_html(html, *args, **kw):
- doc = parse(html)
+ doc = fromstring(html)
word_break(doc, *args, **kw)
return tostring(doc)
Modified: lxml/branch/html/src/lxml/html/diff.py
==============================================================================
--- lxml/branch/html/src/lxml/html/diff.py (original)
+++ lxml/branch/html/src/lxml/html/diff.py Mon Jul 9 21:27:25 2007
@@ -1,6 +1,6 @@
import difflib
from lxml import etree
-from lxml.html import parse_element
+from lxml.html import HTMLFragment
import cgi
import re
@@ -531,7 +531,7 @@
if cleanup:
# This removes any extra markup or structure like <head>:
html = cleanup_html(html)
- return parse_element(html, create_parent=True)
+ return HTMLFragment(html, create_parent=True)
_body_re = re.compile(r'<body.*?>', re.I|re.S)
_end_body_re = re.compile(r'</body.*?>', re.I|re.S)
Modified: lxml/branch/html/src/lxml/html/formfill.py
==============================================================================
--- lxml/branch/html/src/lxml/html/formfill.py (original)
+++ lxml/branch/html/src/lxml/html/formfill.py Mon Jul 9 21:27:25 2007
@@ -1,5 +1,5 @@
from lxml.etree import XPath, ElementBase
-from lxml.html import parse, tostring
+from lxml.html import fromstring, tostring
from lxml.html import defs
__all__ = ['FormNotFound', 'fill_form', 'fill_form_html',
@@ -27,7 +27,7 @@
def fill_form_html(html, values, form_id=None, form_index=None):
if isinstance(html, basestring):
- doc = parse(html)
+ doc = fromstring(html)
return_string = True
else:
doc = copy.deepcopy(html)
@@ -242,7 +242,7 @@
def insert_errors_html(html, values, **kw):
if isinstance(html, basestring):
- doc = parse(html)
+ doc = fromstring(html)
return_string = True
else:
doc = copy.deepcopy(html)
Modified: lxml/branch/html/src/lxml/html/tests/test_basic.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_basic.txt (original)
+++ lxml/branch/html/src/lxml/html/tests/test_basic.txt Mon Jul 9 21:27:25 2007
@@ -1,7 +1,7 @@
lxml.html adds a find_class method to elements::
>>> from lxml.etree import Comment
- >>> from lxml.html import HTML, tostring, parse_element
+ >>> from lxml.html import HTML, HTMLFragment, tostring
>>> from lxml.html.clean import clean, clean_html
>>> from lxml.html import usedoctest
>>> h = HTML('''
@@ -40,7 +40,7 @@
Another method is ``get_element_by_id`` that does what it says::
- >>> print tostring(HTML('''
+ >>> print tostring(HTMLFragment('''
... <div>
... <span id="test">stuff</span>
... </div>''').get_element_by_id('test'))
@@ -48,7 +48,7 @@
Or to get the content of an element without the tags, use text_content()::
- >>> el = parse_element('''
+ >>> el = HTMLFragment('''
... <div>This is <a href="foo">a <b>bold</b> link</a></div>''')
>>> el.text_content()
'This is a bold link'
Modified: lxml/branch/html/src/lxml/html/tests/test_clean.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_clean.txt (original)
+++ lxml/branch/html/src/lxml/html/tests/test_clean.txt Mon Jul 9 21:27:25 2007
@@ -1,4 +1,4 @@
->>> from lxml.html import parse, tostring
+>>> from lxml.html import fromstring, tostring
>>> from lxml.html.clean import clean, clean_html, Cleaner
>>> from lxml.html import usedoctest
@@ -55,7 +55,7 @@
</body>
</html>
->>> print tostring(parse(doc))
+>>> print tostring(fromstring(doc))
<html>
<head>
<script type="text/javascript" src="evil-site"></script>
Modified: lxml/branch/html/src/lxml/html/tests/test_feedparser_data.py
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_feedparser_data.py (original)
+++ lxml/branch/html/src/lxml/html/tests/test_feedparser_data.py Mon Jul 9 21:27:25 2007
@@ -5,7 +5,6 @@
from lxml.tests.common_imports import doctest
from lxml.doctestcompare import LHTMLOutputChecker
-from lxml.html import HTML, parse_element
from lxml.html.clean import clean, Cleaner
feed_dirs = [
Modified: lxml/branch/html/src/lxml/html/tests/test_rewritelinks.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_rewritelinks.txt (original)
+++ lxml/branch/html/src/lxml/html/tests/test_rewritelinks.txt Mon Jul 9 21:27:25 2007
@@ -14,7 +14,7 @@
Some basics::
- >>> from lxml.html import usedoctest, parse_element, tostring
+ >>> from lxml.html import usedoctest, tostring
>>> from lxml.html import rewrite_links
>>> print rewrite_links(
... '<a href="http://old/blah/blah.html">link</a>', relocate_href)
More information about the lxml-checkins
mailing list