[Lxml-checkins] r47637 - in lxml/trunk: . src/lxml
scoder at codespeak.net
scoder at codespeak.net
Sat Oct 20 14:46:30 CEST 2007
Author: scoder
Date: Sat Oct 20 14:46:30 2007
New Revision: 47637
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/docloader.pxi
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/xslt.pxi
Log:
improved interaction of custom resolvers and XSLT
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sat Oct 20 14:46:30 2007
@@ -8,6 +8,9 @@
Features added
--------------
+* Resolvers can now provide a ``base_url`` keyword argument when
+ resolving a document as string data.
+
* When using ``lxml.doctestcompare`` you can give the doctest option
``NOPARSE_MARKUP`` (like ``# doctest: +NOPARSE_MARKUP``) to suppress
the special checking for one test.
@@ -15,6 +18,9 @@
Bugs fixed
----------
+* Using custom resolvers on XSLT stylesheets parsed from a string
+ could request ill-formed URLs.
+
* lxml.etree could crash when adding more than 10000 namespaces to a
document
Modified: lxml/trunk/src/lxml/docloader.pxi
==============================================================================
--- lxml/trunk/src/lxml/docloader.pxi (original)
+++ lxml/trunk/src/lxml/docloader.pxi Sat Oct 20 14:46:30 2007
@@ -9,43 +9,69 @@
cdef class _InputDocument:
cdef _InputDocumentDataType _type
cdef object _data_bytes
+ cdef object _filename
cdef object _file
cdef class Resolver:
"This is the base class of all resolvers."
def resolve(self, system_url, public_id, context):
+ """Override this method to resolve an external source by
+ ``system_url`` and ``public_id``. The third argument is an
+ opaque context object.
+
+ Return the result of one of the ``resolve_*()`` methods.
+ """
return None
def resolve_empty(self, context):
- "Return an empty input document."
+ """Return an empty input document.
+
+ Pass context as parameter.
+ """
cdef _InputDocument doc_ref
doc_ref = _InputDocument()
doc_ref._type = PARSER_DATA_EMPTY
return doc_ref
- def resolve_string(self, string, context):
- "Return a parsable string as input document."
+ def resolve_string(self, string, context, base_url=None):
+ """Return a parsable string as input document.
+
+ Pass data string and context as parameters.
+
+ You can pass the source URL as 'base_url' keyword.
+ """
cdef _InputDocument doc_ref
doc_ref = _InputDocument()
doc_ref._type = PARSER_DATA_STRING
doc_ref._data_bytes = _utf8(string)
+ if base_url is not None:
+ doc_ref._filename = _encodeFilename(base_url)
return doc_ref
def resolve_filename(self, filename, context):
- "Return the name of a parsable file as input document."
+ """Return the name of a parsable file as input document.
+
+ Pass filename and context as parameters.
+ """
cdef _InputDocument doc_ref
doc_ref = _InputDocument()
doc_ref._type = PARSER_DATA_FILENAME
- doc_ref._data_bytes = _encodeFilename(filename)
+ doc_ref._filename = _encodeFilename(filename)
return doc_ref
def resolve_file(self, f, context):
- "Return an open file-like object as input document."
+ """Return an open file-like object as input document.
+
+ Pass open file and context as parameters.
+ """
cdef _InputDocument doc_ref
- if not hasattr(f, 'read'):
+ try:
+ f.read
+ except AttributeError:
raise TypeError, "Argument is not a file-like object"
doc_ref = _InputDocument()
doc_ref._type = PARSER_DATA_FILE
+ doc_ref._filename = _getFilenameForFile(f)
doc_ref._file = f
return doc_ref
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Sat Oct 20 14:46:30 2007
@@ -16,6 +16,9 @@
del __builtin__
+cdef object os_path_join
+from os.path import join as os_path_join
+
cdef object _elementpath
import _elementpath
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Sat Oct 20 14:46:30 2007
@@ -84,25 +84,26 @@
context = <_XSLTResolverContext>c_context
try:
resolvers = context._resolvers
- uri = funicode(c_uri)
+ if cstd.strncmp('string://', c_uri, 9) == 0:
+ uri = funicode(c_uri + 9)
+ if cstd.strncmp('string://', context._c_style_doc.URL, 9) != 0 and \
+ cstd.strcmp('<string>', context._c_style_doc.URL) != 0:
+ # stylesheet URL known => make the target URL absolute
+ uri = os_path_join(context._c_style_doc.URL, uri)
+ else:
+ uri = funicode(c_uri)
doc_ref = resolvers.resolve(uri, None, context)
c_doc = NULL
if doc_ref is not None:
if doc_ref._type == PARSER_DATA_STRING:
c_doc = _parseDoc(
- doc_ref._data_bytes, None, context._parser)
+ doc_ref._data_bytes, doc_ref._filename, context._parser)
elif doc_ref._type == PARSER_DATA_FILENAME:
- if python.PyUnicode_Check(doc_ref._data_bytes):
- filename = _utf8(doc_ref._data_bytes)
- else:
- filename = doc_ref._data_bytes
- c_doc = _parseDocFromFile(filename, context._parser)
+ c_doc = _parseDocFromFile(doc_ref._filename, context._parser)
elif doc_ref._type == PARSER_DATA_FILE:
- filename = _getFilenameForFile(doc_ref._file)
- data = doc_ref._file.read()
- c_doc = _parseDoc(
- data, filename, context._parser)
+ c_doc = _parseDocFromFilelike(
+ doc_ref._file, doc_ref._filename, context._parser)
elif doc_ref._type == PARSER_DATA_EMPTY:
c_doc = _newDoc()
if c_doc is not NULL and c_doc.URL is NULL:
@@ -115,7 +116,7 @@
cdef void _xslt_store_resolver_exception(char* c_uri, void* context,
xslt.xsltLoadType c_type):
- message = "Cannot resolve URI %s" % funicode(c_uri)
+ message = "Cannot resolve URI %s" % c_uri
if c_type == xslt.XSLT_LOAD_DOCUMENT:
exception = XSLTApplyError(message)
else:
@@ -299,7 +300,7 @@
# make sure we always have a stylesheet URL
if c_doc.URL is NULL:
- doc_url_utf = "XSLT:__STRING__XSLT__%s" % id(self)
+ doc_url_utf = "string://__STRING__XSLT__%s" % id(self)
c_doc.URL = tree.xmlStrdup(_cstr(doc_url_utf))
self._error_log = _ErrorLog()
More information about the lxml-checkins
mailing list