[Lxml-checkins] r54365 - in lxml/trunk: . src/lxml/html src/lxml/html/tests

scoder at codespeak.net scoder at codespeak.net
Sat May 3 15:42:27 CEST 2008


Author: scoder
Date: Sat May  3 15:42:27 2008
New Revision: 54365

Added:
   lxml/trunk/src/lxml/html/tests/test_xhtml.py
   lxml/trunk/src/lxml/html/tests/test_xhtml.txt
Modified:
   lxml/trunk/   (props changed)
   lxml/trunk/CHANGES.txt
   lxml/trunk/src/lxml/html/__init__.py
   lxml/trunk/src/lxml/html/tests/test_basic.txt
Log:
 r4146 at delle:  sbehnel | 2008-05-03 15:40:45 +0200
 conversion functions HTML<->XHTML in lxml.html


Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt	(original)
+++ lxml/trunk/CHANGES.txt	Sat May  3 15:42:27 2008
@@ -8,6 +8,9 @@
 Features added
 --------------
 
+* Conversion functions ``html_to_xhtml()`` and ``xhtml_to_html()`` in
+  lxml.html.
+
 * Most features in lxml.html work for XHTML namespaced tag names.
 
 Bugs fixed

Modified: lxml/trunk/src/lxml/html/__init__.py
==============================================================================
--- lxml/trunk/src/lxml/html/__init__.py	(original)
+++ lxml/trunk/src/lxml/html/__init__.py	Sat May  3 15:42:27 2008
@@ -1301,6 +1301,34 @@
 ## Serialization
 ############################################################
 
+def html_to_xhtml(html):
+    """Convert all tags in an HTML tree to XHTML by moving them to the
+    XHTML namespace.
+    """
+    try:
+        html = html.getroot()
+    except AttributeError:
+        pass
+    prefix = "{%s}" % XHTML_NAMESPACE
+    for el in html.iter():
+        tag = el.tag
+        if isinstance(tag, basestring):
+            if tag[0] != '{':
+                el.tag = prefix + tag
+
+def xhtml_to_html(xhtml):
+    """Convert all tags in an XHTML tree to HTML by removing their
+    XHTML namespace.
+    """
+    try:
+        xhtml = xhtml.getroot()
+    except AttributeError:
+        pass
+    prefix = "{%s}" % XHTML_NAMESPACE
+    prefix_len = len(prefix)
+    for el in xhtml.iter(prefix + "*"):
+        el.tag = el.tag[prefix_len:]
+
 # This isn't a general match, but it's a match for what libxml2
 # specifically serialises:
 __replace_meta_content_type = re.compile(

Modified: lxml/trunk/src/lxml/html/tests/test_basic.txt
==============================================================================
--- lxml/trunk/src/lxml/html/tests/test_basic.txt	(original)
+++ lxml/trunk/src/lxml/html/tests/test_basic.txt	Sat May  3 15:42:27 2008
@@ -96,16 +96,3 @@
       <div>footer</div>
      </body>
     </html>
-
-lxml.html has two parsers, one for HTML, one for XHTML:
-
-    >>> from lxml.html import HTMLParser, XHTMLParser
-    >>> html = "<html><body><p>Hi!</p></body></html>"
-
-    >>> root = document_fromstring(html, parser=HTMLParser())
-    >>> print root.tag
-    html
-
-    >>> root = document_fromstring(html, parser=XHTMLParser())
-    >>> print root.tag
-    html

Added: lxml/trunk/src/lxml/html/tests/test_xhtml.py
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/html/tests/test_xhtml.py	Sat May  3 15:42:27 2008
@@ -0,0 +1,11 @@
+import unittest, sys
+from lxml.tests.common_imports import doctest
+import lxml.html
+
+def test_suite():
+    suite = unittest.TestSuite()
+    suite.addTests([doctest.DocFileSuite('test_xhtml.txt')])
+    return suite
+
+if __name__ == '__main__':
+    unittest.main()

Added: lxml/trunk/src/lxml/html/tests/test_xhtml.txt
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/html/tests/test_xhtml.txt	Sat May  3 15:42:27 2008
@@ -0,0 +1,30 @@
+    >>> from lxml.html import document_fromstring, fragment_fromstring, tostring
+
+lxml.html has two parsers, one for HTML, one for XHTML:
+
+    >>> from lxml.html import HTMLParser, XHTMLParser
+    >>> html = "<html><body><p>Hi!</p></body></html>"
+
+    >>> root = document_fromstring(html, parser=HTMLParser())
+    >>> print root.tag
+    html
+
+    >>> root = document_fromstring(html, parser=XHTMLParser())
+    >>> print root.tag
+    html
+
+There are two functions for converting between HTML and XHTML:
+
+    >>> from lxml.html import xhtml_to_html, html_to_xhtml
+
+    >>> doc = document_fromstring(html, parser=HTMLParser())
+    >>> print tostring(doc)
+    <html><body><p>Hi!</p></body></html>
+
+    >>> html_to_xhtml(doc)
+    >>> print tostring(doc)
+    <html:html xmlns:html="http://www.w3.org/1999/xhtml"><html:body><html:p>Hi!</html:p></html:body></html:html>
+
+    >>> xhtml_to_html(doc)
+    >>> print tostring(doc)
+    <html xmlns:html="http://www.w3.org/1999/xhtml"><body><p>Hi!</p></body></html>


More information about the lxml-checkins mailing list