[Lxml-checkins] r50534 - in lxml/trunk: . src/lxml
scoder at codespeak.net
scoder at codespeak.net
Sat Jan 12 20:03:41 CET 2008
Author: scoder
Date: Sat Jan 12 20:03:41 2008
New Revision: 50534
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/doctestcompare.py
Log:
r3256 at delle: sbehnel | 2008-01-12 19:47:21 +0100
do not use recovering HTML parser in doctestcompare
Modified: lxml/trunk/src/lxml/doctestcompare.py
==============================================================================
--- lxml/trunk/src/lxml/doctestcompare.py (original)
+++ lxml/trunk/src/lxml/doctestcompare.py Sat Jan 12 20:03:41 2008
@@ -28,7 +28,6 @@
"""
from lxml import etree
-from lxml.html import document_fromstring
import re
import doctest
import cgi
@@ -51,6 +50,11 @@
def norm_whitespace(v):
return _norm_whitespace_re.sub(' ', v)
+_html_parser = etree.HTMLParser(recover=False)
+
+def html_fromstring(html):
+ return etree.fromstring(html, _html_parser)
+
# We use this to distinguish repr()s from elements:
_repr_re = re.compile(r'^<[^>]+ (at|object) ')
_norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
@@ -90,12 +94,12 @@
if NOPARSE_MARKUP & optionflags:
return None
if PARSE_HTML & optionflags:
- parser = document_fromstring
+ parser = html_fromstring
elif PARSE_XML & optionflags:
parser = etree.XML
elif (want.strip().lower().startswith('<html')
and got.strip().startswith('<html')):
- parser = document_fromstring
+ parser = html_fromstring
elif (self._looks_like_markup(want)
and self._looks_like_markup(got)):
parser = self.get_default_parser()
@@ -183,7 +187,7 @@
return '\n'.join(errors)
else:
return value
- html = parser is document_fromstring
+ html = parser is html_fromstring
diff_parts = []
diff_parts.append('Expected:')
diff_parts.append(self.format_doc(want_doc, html, 2))
@@ -344,7 +348,7 @@
class LHTMLOutputChecker(LXMLOutputChecker):
def get_default_parser(self):
- return document_fromstring
+ return html_fromstring
def install(html=False):
"""
More information about the lxml-checkins
mailing list