[Lxml-checkins] r52369 - in lxml/trunk: . src/lxml src/lxml/tests

scoder at codespeak.net scoder at codespeak.net
Tue Mar 11 10:34:25 CET 2008


Author: scoder
Date: Tue Mar 11 10:34:21 2008
New Revision: 52369

Modified:
   lxml/trunk/   (props changed)
   lxml/trunk/src/lxml/lxml.etree.pyx
   lxml/trunk/src/lxml/tests/test_etree.py
Log:
 r3754 at delle:  sbehnel | 2008-03-11 10:33:26 +0100
 let el.base property fall back to document URL also for HTML documents


Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx	(original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx	Tue Mar 11 10:34:21 2008
@@ -930,7 +930,9 @@
             cdef char* c_base
             c_base = tree.xmlNodeGetBase(self._doc._c_doc, self._c_node)
             if c_base is NULL:
-                return None
+                if self._doc._c_doc.URL is NULL:
+                    return None
+                return self._doc._c_doc.URL
             # FIXME: this might be UTF-8 or any other 8-bit encoding
             base = c_base
             tree.xmlFree(c_base)

Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py	(original)
+++ lxml/trunk/src/lxml/tests/test_etree.py	Tue Mar 11 10:34:21 2008
@@ -1715,6 +1715,17 @@
             root.get('{http://www.w3.org/XML/1998/namespace}base'),
             "https://secret/url")
 
+    def test_html_base(self):
+        etree = self.etree
+        root = etree.HTML("<html><body></body></html>",
+                          base_url="http://no/such/url")
+        self.assertEquals(root.base, "http://no/such/url")
+
+    def test_html_base_tag(self):
+        etree = self.etree
+        root = etree.HTML('<html><head><base href="http://no/such/url"></head></html>')
+        self.assertEquals(root.base, "http://no/such/url")
+
     def test_dtd_io(self):
         # check that DTDs that go in also go back out
         xml = '''\


More information about the lxml-checkins mailing list