[Lxml-checkins] r47637 - in lxml/trunk: . src/lxml

scoder at codespeak.net scoder at codespeak.net
Sat Oct 20 14:46:30 CEST 2007


Author: scoder
Date: Sat Oct 20 14:46:30 2007
New Revision: 47637

Modified:
   lxml/trunk/CHANGES.txt
   lxml/trunk/src/lxml/docloader.pxi
   lxml/trunk/src/lxml/etree.pyx
   lxml/trunk/src/lxml/xslt.pxi
Log:
improved interaction of custom resolvers and XSLT

Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt	(original)
+++ lxml/trunk/CHANGES.txt	Sat Oct 20 14:46:30 2007
@@ -8,6 +8,9 @@
 Features added
 --------------
 
+* Resolvers can now provide a ``base_url`` keyword argument when
+  resolving a document as string data.
+
 * When using ``lxml.doctestcompare`` you can give the doctest option
   ``NOPARSE_MARKUP`` (like ``# doctest: +NOPARSE_MARKUP``) to suppress
   the special checking for one test.
@@ -15,6 +18,9 @@
 Bugs fixed
 ----------
 
+* Using custom resolvers on XSLT stylesheets parsed from a string
+  could request ill-formed URLs.
+
 * lxml.etree could crash when adding more than 10000 namespaces to a
   document
 

Modified: lxml/trunk/src/lxml/docloader.pxi
==============================================================================
--- lxml/trunk/src/lxml/docloader.pxi	(original)
+++ lxml/trunk/src/lxml/docloader.pxi	Sat Oct 20 14:46:30 2007
@@ -9,43 +9,69 @@
 cdef class _InputDocument:
     cdef _InputDocumentDataType _type
     cdef object _data_bytes
+    cdef object _filename
     cdef object _file
 
 cdef class Resolver:
     "This is the base class of all resolvers."
     def resolve(self, system_url, public_id, context):
+        """Override this method to resolve an external source by
+        ``system_url`` and ``public_id``.  The third argument is an
+        opaque context object.
+
+        Return the result of one of the ``resolve_*()`` methods.
+        """
         return None
 
     def resolve_empty(self, context):
-        "Return an empty input document."
+        """Return an empty input document.
+
+        Pass context as parameter.
+        """
         cdef _InputDocument doc_ref
         doc_ref = _InputDocument()
         doc_ref._type = PARSER_DATA_EMPTY
         return doc_ref
 
-    def resolve_string(self, string, context):
-        "Return a parsable string as input document."
+    def resolve_string(self, string, context, base_url=None):
+        """Return a parsable string as input document.
+
+        Pass data string and context as parameters.
+
+        You can pass the source URL as 'base_url' keyword.
+        """
         cdef _InputDocument doc_ref
         doc_ref = _InputDocument()
         doc_ref._type = PARSER_DATA_STRING
         doc_ref._data_bytes = _utf8(string)
+        if base_url is not None:
+            doc_ref._filename = _encodeFilename(base_url)
         return doc_ref
 
     def resolve_filename(self, filename, context):
-        "Return the name of a parsable file as input document."
+        """Return the name of a parsable file as input document.
+
+        Pass filename and context as parameters.
+        """
         cdef _InputDocument doc_ref
         doc_ref = _InputDocument()
         doc_ref._type = PARSER_DATA_FILENAME
-        doc_ref._data_bytes = _encodeFilename(filename)
+        doc_ref._filename = _encodeFilename(filename)
         return doc_ref
 
     def resolve_file(self, f, context):
-        "Return an open file-like object as input document."
+        """Return an open file-like object as input document.
+
+        Pass open file and context as parameters.
+        """
         cdef _InputDocument doc_ref
-        if not hasattr(f, 'read'):
+        try:
+            f.read
+        except AttributeError:
             raise TypeError, "Argument is not a file-like object"
         doc_ref = _InputDocument()
         doc_ref._type = PARSER_DATA_FILE
+        doc_ref._filename = _getFilenameForFile(f)
         doc_ref._file = f
         return doc_ref
 

Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx	(original)
+++ lxml/trunk/src/lxml/etree.pyx	Sat Oct 20 14:46:30 2007
@@ -16,6 +16,9 @@
 
 del __builtin__
 
+cdef object os_path_join
+from os.path import join as os_path_join
+
 cdef object _elementpath
 import _elementpath
 

Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi	(original)
+++ lxml/trunk/src/lxml/xslt.pxi	Sat Oct 20 14:46:30 2007
@@ -84,25 +84,26 @@
     context = <_XSLTResolverContext>c_context
     try:
         resolvers = context._resolvers
-        uri = funicode(c_uri)
+        if cstd.strncmp('string://', c_uri, 9) == 0:
+            uri = funicode(c_uri + 9)
+            if cstd.strncmp('string://', context._c_style_doc.URL, 9) != 0 and \
+                    cstd.strcmp('<string>', context._c_style_doc.URL) != 0:
+                # stylesheet URL known => make the target URL absolute
+                uri = os_path_join(context._c_style_doc.URL, uri)
+        else:
+            uri = funicode(c_uri)
         doc_ref = resolvers.resolve(uri, None, context)
 
         c_doc = NULL
         if doc_ref is not None:
             if doc_ref._type == PARSER_DATA_STRING:
                 c_doc = _parseDoc(
-                    doc_ref._data_bytes, None, context._parser)
+                    doc_ref._data_bytes, doc_ref._filename, context._parser)
             elif doc_ref._type == PARSER_DATA_FILENAME:
-                if python.PyUnicode_Check(doc_ref._data_bytes):
-                    filename = _utf8(doc_ref._data_bytes)
-                else:
-                    filename = doc_ref._data_bytes
-                c_doc = _parseDocFromFile(filename, context._parser)
+                c_doc = _parseDocFromFile(doc_ref._filename, context._parser)
             elif doc_ref._type == PARSER_DATA_FILE:
-                filename = _getFilenameForFile(doc_ref._file)
-                data = doc_ref._file.read()
-                c_doc = _parseDoc(
-                    data, filename, context._parser)
+                c_doc = _parseDocFromFilelike(
+                    doc_ref._file, doc_ref._filename, context._parser)
             elif doc_ref._type == PARSER_DATA_EMPTY:
                 c_doc = _newDoc()
             if c_doc is not NULL and c_doc.URL is NULL:
@@ -115,7 +116,7 @@
 
 cdef void _xslt_store_resolver_exception(char* c_uri, void* context,
                                          xslt.xsltLoadType c_type):
-    message = "Cannot resolve URI %s" % funicode(c_uri)
+    message = "Cannot resolve URI %s" % c_uri
     if c_type == xslt.XSLT_LOAD_DOCUMENT:
         exception = XSLTApplyError(message)
     else:
@@ -299,7 +300,7 @@
 
         # make sure we always have a stylesheet URL
         if c_doc.URL is NULL:
-            doc_url_utf = "XSLT:__STRING__XSLT__%s" % id(self)
+            doc_url_utf = "string://__STRING__XSLT__%s" % id(self)
             c_doc.URL = tree.xmlStrdup(_cstr(doc_url_utf))
 
         self._error_log = _ErrorLog()


More information about the lxml-checkins mailing list