[Lxml-checkins] r44845 - in lxml/branch/lxml-1.3: . src/lxml src/lxml/tests
scoder at codespeak.net
scoder at codespeak.net
Sun Jul 8 11:30:20 CEST 2007
Author: scoder
Date: Sun Jul 8 11:30:19 2007
New Revision: 44845
Modified:
lxml/branch/lxml-1.3/CHANGES.txt
lxml/branch/lxml-1.3/src/lxml/etree.pyx
lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py
Log:
trunk merge: support base_url kw arg in HTML() and XML()
Modified: lxml/branch/lxml-1.3/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-1.3/CHANGES.txt (original)
+++ lxml/branch/lxml-1.3/CHANGES.txt Sun Jul 8 11:30:19 2007
@@ -2,6 +2,18 @@
lxml changelog
==============
+Under development
+=================
+
+Features added
+--------------
+
+* Support ``base_url`` keyword argument in ``HTML()`` and ``XML()``
+
+Bugs fixed
+----------
+
+
1.3.2 (2007-07-03)
==================
Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original)
+++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Sun Jul 8 11:30:19 2007
@@ -1871,28 +1871,42 @@
return _elementTreeFactory(doc, element)
-def HTML(text, _BaseParser parser=None):
+def HTML(text, _BaseParser parser=None, base_url=None):
"""Parses an HTML document from a string constant. This function can be used
to embed "HTML literals" in Python code.
+
+ To override the parser with a different ``HTMLParser`` you can pass it to
+ the ``parser`` keyword argument.
+
+ The ``base_url`` keyword argument allows to set the original base URL of
+ the document to support relative Paths when looking up external entities
+ (DTD, XInclude, ...).
"""
cdef _Document doc
if parser is None:
parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
if not isinstance(parser, HTMLParser):
parser = __DEFAULT_HTML_PARSER
- doc = _parseMemoryDocument(text, None, parser)
+ doc = _parseMemoryDocument(text, base_url, parser)
return doc.getroot()
-def XML(text, _BaseParser parser=None):
+def XML(text, _BaseParser parser=None, base_url=None):
"""Parses an XML document from a string constant. This function can be used
to embed "XML literals" in Python code.
+
+ To override the parser with a different ``XMLParser`` you can pass it to
+ the ``parser`` keyword argument.
+
+ The ``base_url`` keyword argument allows to set the original base URL of
+ the document to support relative Paths when looking up external entities
+ (DTD, XInclude, ...).
"""
cdef _Document doc
if parser is None:
parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
if not isinstance(parser, XMLParser):
parser = __DEFAULT_XML_PARSER
- doc = _parseMemoryDocument(text, None, parser)
+ doc = _parseMemoryDocument(text, base_url, parser)
return doc.getroot()
fromstring = XML
Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py (original)
+++ lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py Sun Jul 8 11:30:19 2007
@@ -1378,6 +1378,18 @@
self.assertEquals(None, el.sourceline)
self.assertEquals(None, child.sourceline)
+ def test_XML_base_url_docinfo(self):
+ etree = self.etree
+ root = etree.XML("<root/>", base_url="http://no/such/url")
+ docinfo = root.getroottree().docinfo
+ self.assertEquals(docinfo.URL, "http://no/such/url")
+
+ def test_HTML_base_url_docinfo(self):
+ etree = self.etree
+ root = etree.HTML("<html/>", base_url="http://no/such/url")
+ docinfo = root.getroottree().docinfo
+ self.assertEquals(docinfo.URL, "http://no/such/url")
+
def test_docinfo_public(self):
etree = self.etree
xml_header = '<?xml version="1.0" encoding="ascii"?>'
More information about the lxml-checkins
mailing list