[Lxml-checkins] r50534 - in lxml/trunk: . src/lxml

scoder at codespeak.net scoder at codespeak.net
Sat Jan 12 20:03:41 CET 2008


Author: scoder
Date: Sat Jan 12 20:03:41 2008
New Revision: 50534

Modified:
   lxml/trunk/   (props changed)
   lxml/trunk/src/lxml/doctestcompare.py
Log:
 r3256 at delle:  sbehnel | 2008-01-12 19:47:21 +0100
 do not use recovering HTML parser in doctestcompare


Modified: lxml/trunk/src/lxml/doctestcompare.py
==============================================================================
--- lxml/trunk/src/lxml/doctestcompare.py	(original)
+++ lxml/trunk/src/lxml/doctestcompare.py	Sat Jan 12 20:03:41 2008
@@ -28,7 +28,6 @@
 """
 
 from lxml import etree
-from lxml.html import document_fromstring
 import re
 import doctest
 import cgi
@@ -51,6 +50,11 @@
 def norm_whitespace(v):
     return _norm_whitespace_re.sub(' ', v)
 
+_html_parser = etree.HTMLParser(recover=False)
+
+def html_fromstring(html):
+    return etree.fromstring(html, _html_parser)
+
 # We use this to distinguish repr()s from elements:
 _repr_re = re.compile(r'^<[^>]+ (at|object) ')
 _norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
@@ -90,12 +94,12 @@
         if NOPARSE_MARKUP & optionflags:
             return None
         if PARSE_HTML & optionflags:
-            parser = document_fromstring
+            parser = html_fromstring
         elif PARSE_XML & optionflags:
             parser = etree.XML
         elif (want.strip().lower().startswith('<html')
               and got.strip().startswith('<html')):
-            parser = document_fromstring
+            parser = html_fromstring
         elif (self._looks_like_markup(want)
               and self._looks_like_markup(got)):
             parser = self.get_default_parser()
@@ -183,7 +187,7 @@
                 return '\n'.join(errors)
             else:
                 return value
-        html = parser is document_fromstring
+        html = parser is html_fromstring
         diff_parts = []
         diff_parts.append('Expected:')
         diff_parts.append(self.format_doc(want_doc, html, 2))
@@ -344,7 +348,7 @@
 
 class LHTMLOutputChecker(LXMLOutputChecker):
     def get_default_parser(self):
-        return document_fromstring
+        return html_fromstring
     
 def install(html=False):
     """


More information about the lxml-checkins mailing list