[Lxml-checkins] r44845 - in lxml/branch/lxml-1.3: . src/lxml src/lxml/tests

scoder at codespeak.net scoder at codespeak.net
Sun Jul 8 11:30:20 CEST 2007


Author: scoder
Date: Sun Jul  8 11:30:19 2007
New Revision: 44845

Modified:
   lxml/branch/lxml-1.3/CHANGES.txt
   lxml/branch/lxml-1.3/src/lxml/etree.pyx
   lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py
Log:
trunk merge: support base_url kw arg in HTML() and XML()

Modified: lxml/branch/lxml-1.3/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-1.3/CHANGES.txt	(original)
+++ lxml/branch/lxml-1.3/CHANGES.txt	Sun Jul  8 11:30:19 2007
@@ -2,6 +2,18 @@
 lxml changelog
 ==============
 
+Under development
+=================
+
+Features added
+--------------
+
+* Support ``base_url`` keyword argument in ``HTML()`` and ``XML()``
+
+Bugs fixed
+----------
+
+
 1.3.2 (2007-07-03)
 ==================
 

Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/etree.pyx	(original)
+++ lxml/branch/lxml-1.3/src/lxml/etree.pyx	Sun Jul  8 11:30:19 2007
@@ -1871,28 +1871,42 @@
 
     return _elementTreeFactory(doc, element)
 
-def HTML(text, _BaseParser parser=None):
+def HTML(text, _BaseParser parser=None, base_url=None):
     """Parses an HTML document from a string constant. This function can be used
     to embed "HTML literals" in Python code.
+
+    To override the parser with a different ``HTMLParser`` you can pass it to
+    the ``parser`` keyword argument.
+
+    The ``base_url`` keyword argument allows to set the original base URL of
+    the document to support relative Paths when looking up external entities
+    (DTD, XInclude, ...).
     """
     cdef _Document doc
     if parser is None:
         parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
         if not isinstance(parser, HTMLParser):
             parser = __DEFAULT_HTML_PARSER
-    doc = _parseMemoryDocument(text, None, parser)
+    doc = _parseMemoryDocument(text, base_url, parser)
     return doc.getroot()
 
-def XML(text, _BaseParser parser=None):
+def XML(text, _BaseParser parser=None, base_url=None):
     """Parses an XML document from a string constant. This function can be used
     to embed "XML literals" in Python code.
+
+    To override the parser with a different ``XMLParser`` you can pass it to
+    the ``parser`` keyword argument.
+
+    The ``base_url`` keyword argument allows to set the original base URL of
+    the document to support relative Paths when looking up external entities
+    (DTD, XInclude, ...).
     """
     cdef _Document doc
     if parser is None:
         parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
         if not isinstance(parser, XMLParser):
             parser = __DEFAULT_XML_PARSER
-    doc = _parseMemoryDocument(text, None, parser)
+    doc = _parseMemoryDocument(text, base_url, parser)
     return doc.getroot()
 
 fromstring = XML

Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py	(original)
+++ lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py	Sun Jul  8 11:30:19 2007
@@ -1378,6 +1378,18 @@
         self.assertEquals(None, el.sourceline)
         self.assertEquals(None, child.sourceline)
 
+    def test_XML_base_url_docinfo(self):
+        etree = self.etree
+        root = etree.XML("<root/>", base_url="http://no/such/url")
+        docinfo = root.getroottree().docinfo
+        self.assertEquals(docinfo.URL, "http://no/such/url")
+
+    def test_HTML_base_url_docinfo(self):
+        etree = self.etree
+        root = etree.HTML("<html/>", base_url="http://no/such/url")
+        docinfo = root.getroottree().docinfo
+        self.assertEquals(docinfo.URL, "http://no/such/url")
+
     def test_docinfo_public(self):
         etree = self.etree
         xml_header = '<?xml version="1.0" encoding="ascii"?>'


More information about the lxml-checkins mailing list