[Lxml-checkins] r32721 - in lxml/branch/lxml-1.1: . doc src/lxml src/lxml/tests
scoder at codespeak.net
scoder at codespeak.net
Fri Sep 29 09:12:21 CEST 2006
Author: scoder
Date: Fri Sep 29 09:12:18 2006
New Revision: 32721
Modified:
lxml/branch/lxml-1.1/CHANGES.txt
lxml/branch/lxml-1.1/doc/api.txt
lxml/branch/lxml-1.1/doc/build.txt
lxml/branch/lxml-1.1/src/lxml/classlookup.pxi
lxml/branch/lxml-1.1/src/lxml/etree.pyx
lxml/branch/lxml-1.1/src/lxml/parser.pxi
lxml/branch/lxml-1.1/src/lxml/tests/test_xslt.py
lxml/branch/lxml-1.1/src/lxml/tree.pxd
lxml/branch/lxml-1.1/src/lxml/xinclude.pxd
lxml/branch/lxml-1.1/src/lxml/xslt.pxi
Log:
merge from trunk: XSLT-PI, doc updates, XInclude parser options
Modified: lxml/branch/lxml-1.1/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-1.1/CHANGES.txt (original)
+++ lxml/branch/lxml-1.1/CHANGES.txt Fri Sep 29 09:12:18 2006
@@ -2,13 +2,14 @@
lxml changelog
==============
-=======
current
=======
Features added
--------------
+* Simplified support for handling XSLT processing instructions
+
Bugs fixed
----------
Modified: lxml/branch/lxml-1.1/doc/api.txt
==============================================================================
--- lxml/branch/lxml-1.1/doc/api.txt (original)
+++ lxml/branch/lxml-1.1/doc/api.txt Fri Sep 29 09:12:18 2006
@@ -872,8 +872,8 @@
xinclude
--------
-Simple XInclude support exists. You can make xinclude statements in a
-document be processed by calling the xinclude() method on a tree::
+Simple XInclude support exists. You can let lxml process xinclude statements
+in a document by calling the xinclude() method on a tree::
>>> data = StringIO('''\
... <doc xmlns:xi="http://www.w3.org/2001/XInclude">
@@ -890,10 +890,10 @@
write_c14n on ElementTree
-------------------------
-The lxml.etree.ElementTree class has a method write_c14n, which takes
-one argument: a file object. This file object will receive an UTF-8
-representation of the canonicalized form of the XML, following the W3C
-C14N recommendation. For example::
+The lxml.etree.ElementTree class has a method write_c14n, which takes a file
+object as argument. This file object will receive an UTF-8 representation of
+the canonicalized form of the XML, following the W3C C14N recommendation. For
+example::
>>> f = StringIO('<a><b/></a>')
>>> tree = etree.parse(f)
Modified: lxml/branch/lxml-1.1/doc/build.txt
==============================================================================
--- lxml/branch/lxml-1.1/doc/build.txt (original)
+++ lxml/branch/lxml-1.1/doc/build.txt Fri Sep 29 09:12:18 2006
@@ -103,7 +103,7 @@
building above), as it searches the ``src`` directory. You can use the
following one-step command to trigger an in-place build and test it::
- make clean test
+ make test
To run the ElementTree and cElementTree compatibility tests, make sure
you have lxml on your PYTHONPATH first, then run::
Modified: lxml/branch/lxml-1.1/src/lxml/classlookup.pxi
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/classlookup.pxi (original)
+++ lxml/branch/lxml-1.1/src/lxml/classlookup.pxi Fri Sep 29 09:12:18 2006
@@ -120,6 +120,12 @@
return (<ElementDefaultClassLookup>state).comment_class
elif c_node.type == tree.XML_PI_NODE:
if state is None:
+ # special case XSLT-PI
+ if c_node.name is not NULL and c_node.content is not NULL:
+ if cstd.strcmp(c_node.name, "xml-stylesheet") == 0:
+ if cstd.strstr(c_node.content, "text/xsl") is not NULL or \
+ cstd.strstr(c_node.content, "text/xml") is not NULL:
+ return _XSLTProcessingInstruction
return _ProcessingInstruction
else:
return (<ElementDefaultClassLookup>state).pi_class
Modified: lxml/branch/lxml-1.1/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/etree.pyx (original)
+++ lxml/branch/lxml-1.1/src/lxml/etree.pyx Fri Sep 29 09:12:18 2006
@@ -603,7 +603,13 @@
# typed as elements. The included fragment is added between the two,
# i.e. as a sibling, which does not conflict with traversal.
self._assertHasRoot()
- result = xinclude.xmlXIncludeProcessTree(self._context_node._c_node)
+ if self._context_node._doc._parser != None:
+ result = xinclude.xmlXIncludeProcessTreeFlags(
+ self._context_node._c_node,
+ self._context_node._doc._parser._parse_options)
+ else:
+ result = xinclude.xmlXIncludeProcessTree(
+ self._context_node._c_node)
if result == -1:
raise XIncludeError, "XInclude processing failed"
Modified: lxml/branch/lxml-1.1/src/lxml/parser.pxi
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/parser.pxi (original)
+++ lxml/branch/lxml-1.1/src/lxml/parser.pxi Fri Sep 29 09:12:18 2006
@@ -865,7 +865,6 @@
############################################################
cdef _Document _parseDocument(source, _BaseParser parser):
- cdef xmlDoc* c_doc
filename = _getFilenameForFile(source)
if hasattr(source, 'getvalue') and hasattr(source, 'tell'):
# StringIO - reading from start?
@@ -882,7 +881,11 @@
if filename is None:
filename = _encodeFilename(source)
# open filename
- c_doc = _parseDocFromFile(filename, parser)
+ return _parseDocumentFromURL(filename, parser)
+
+cdef _Document _parseDocumentFromURL(url, _BaseParser parser):
+ cdef xmlDoc* c_doc
+ c_doc = _parseDocFromFile(url, parser)
return _documentFactory(c_doc, parser)
cdef _Document _parseMemoryDocument(text, url, _BaseParser parser):
Modified: lxml/branch/lxml-1.1/src/lxml/tests/test_xslt.py
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/tests/test_xslt.py (original)
+++ lxml/branch/lxml-1.1/src/lxml/tests/test_xslt.py Fri Sep 29 09:12:18 2006
@@ -573,6 +573,82 @@
result = xslt(root[0])
root[:] = result.getroot()[:]
del root # segfaulted before
+
+ def test_xslt_pi(self):
+ tree = self.parse('''\
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="%s"?>
+<a>
+ <b>B</b>
+ <c>C</c>
+</a>''' % fileInTestDir("test1.xslt"))
+
+ style_root = tree.getroot().getprevious().parseXSL().getroot()
+ self.assertEquals("{http://www.w3.org/1999/XSL/Transform}stylesheet",
+ style_root.tag)
+
+ def test_xslt_pi_embedded_xmlid(self):
+ # test xml:id dictionary lookup mechanism
+ tree = self.parse('''\
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="#style"?>
+<a>
+ <b>B</b>
+ <c>C</c>
+ <xsl:stylesheet version="1.0" xml:id="style"
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+ <xsl:template match="*" />
+ <xsl:template match="/">
+ <foo><xsl:value-of select="/a/b/text()" /></foo>
+ </xsl:template>
+ </xsl:stylesheet>
+</a>''')
+
+ style_root = tree.getroot().getprevious().parseXSL().getroot()
+ self.assertEquals("{http://www.w3.org/1999/XSL/Transform}stylesheet",
+ style_root.tag)
+
+ st = etree.XSLT(style_root)
+ res = st.apply(tree)
+ self.assertEquals('''\
+<?xml version="1.0"?>
+<foo>B</foo>
+''',
+ st.tostring(res))
+
+ def test_xslt_pi_embedded_id(self):
+ # test XPath lookup mechanism
+ tree = self.parse('''\
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="#style"?>
+<a>
+ <b>B</b>
+ <c>C</c>
+</a>''')
+
+ style = self.parse('''\
+<xsl:stylesheet version="1.0" xml:id="style"
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+ <xsl:template match="*" />
+ <xsl:template match="/">
+ <foo><xsl:value-of select="/a/b/text()" /></foo>
+ </xsl:template>
+</xsl:stylesheet>
+''')
+
+ tree.getroot().append(style.getroot())
+
+ style_root = tree.getroot().getprevious().parseXSL().getroot()
+ self.assertEquals("{http://www.w3.org/1999/XSL/Transform}stylesheet",
+ style_root.tag)
+
+ st = etree.XSLT(style_root)
+ res = st.apply(tree)
+ self.assertEquals('''\
+<?xml version="1.0"?>
+<foo>B</foo>
+''',
+ st.tostring(res))
def test_exslt_regexp_test(self):
xslt = etree.XSLT(etree.XML("""\
Modified: lxml/branch/lxml-1.1/src/lxml/tree.pxd
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/tree.pxd (original)
+++ lxml/branch/lxml-1.1/src/lxml/tree.pxd Fri Sep 29 09:12:18 2006
@@ -210,7 +210,12 @@
cdef char* xmlBufferContent(xmlBuffer* buf)
cdef int xmlBufferLength(xmlBuffer* buf)
cdef int xmlKeepBlanksDefault(int val)
-
+ cdef char* xmlNodeGetBase(xmlDoc* doc, xmlNode* node)
+ cdef char* xmlBuildURI(char* href, char* base)
+
+cdef extern from "libxml/valid.h":
+ cdef xmlAttr* xmlGetID(xmlDoc* doc, char* ID)
+
cdef extern from "libxml/xmlIO.h":
cdef int xmlOutputBufferWriteString(xmlOutputBuffer* out, char* str)
cdef int xmlOutputBufferFlush(xmlOutputBuffer* out)
Modified: lxml/branch/lxml-1.1/src/lxml/xinclude.pxd
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/xinclude.pxd (original)
+++ lxml/branch/lxml-1.1/src/lxml/xinclude.pxd Fri Sep 29 09:12:18 2006
@@ -3,5 +3,7 @@
cdef extern from "libxml/xinclude.h":
cdef int xmlXIncludeProcess(xmlDoc* doc)
+ cdef int xmlXIncludeProcessFlags(xmlDoc* doc, int parser_opts)
cdef int xmlXIncludeProcessTree(xmlNode* doc)
+ cdef int xmlXIncludeProcessTreeFlags(xmlNode* doc, int parser_opts)
Modified: lxml/branch/lxml-1.1/src/lxml/xslt.pxi
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/xslt.pxi (original)
+++ lxml/branch/lxml-1.1/src/lxml/xslt.pxi Fri Sep 29 09:12:18 2006
@@ -545,10 +545,68 @@
################################################################################
-# EXSLT regexp implementation
+# XSLT PI support
+
+cdef object _FIND_PI_HREF
+_FIND_PI_HREF = re.compile('href\s*=\s*["\']([^"\']+)["\']').findall
+
+cdef XPath _findStylesheetByID
+_findStylesheetByID = XPath(
+ "//xsl:stylesheet[@xml:id = $id]",
+ {"xsl":"http://www.w3.org/1999/XSL/Transform"})
+
+cdef class _XSLTProcessingInstruction(PIBase):
+ def parseXSL(self, parser=None):
+ """Try to parse the stylesheet referenced by this PI and return an
+ ElementTree for it. If the stylesheet is embedded in the same
+ document (referenced via xml:id), find and return an ElementTree for
+ the stylesheet Element.
+
+ The optional ``parser`` keyword argument can be passed to specify the
+ parser used to read from external stylesheet URLs.
+ """
+ cdef _Document result_doc
+ cdef _Element result_node
+ cdef char* c_href
+ cdef xmlAttr* c_attr
+ if self._c_node.content is NULL:
+ raise ValueError, "PI lacks content"
+ hrefs_utf = _FIND_PI_HREF(self._c_node.content)
+ if len(hrefs_utf) != 1:
+ raise ValueError, "malformed PI attributes"
+ href_utf = hrefs_utf[0]
+ c_href = _cstr(href_utf)
+
+ if c_href[0] != c'#':
+ # normal URL, try to parse from it
+ c_href = tree.xmlBuildURI(
+ c_href,
+ tree.xmlNodeGetBase(self._c_node.doc, self._c_node))
+ if c_href is NULL:
+ c_href = _cstr(href_utf)
+ result_doc = _parseDocument(funicode(c_href), parser)
+ return _elementTreeFactory(result_doc, None)
+
+ # ID reference to embedded stylesheet
+ # try XML:ID lookup
+ c_href = c_href+1 # skip leading '#'
+ c_attr = tree.xmlGetID(self._c_node.doc, c_href)
+ if c_attr is not NULL and c_attr.doc is self._c_node.doc:
+ result_node = _elementFactory(self._doc, c_attr.parent)
+ return _elementTreeFactory(result_node._doc, result_node)
+
+ # try XPath search
+ root = _findStylesheetByID(self._doc, id=funicode(c_href))
+ if not root:
+ raise ValueError, "reference to non-existing embedded stylesheet"
+ elif len(root) > 1:
+ raise ValueError, "ambiguous reference to embedded stylesheet"
+ result_node = root[0]
+ return _elementTreeFactory(result_node._doc, result_node)
-cdef object RE_COMPILE
-RE_COMPILE = re.compile
+
+################################################################################
+# EXSLT regexp implementation
cdef class _ExsltRegExp:
cdef object _compile_map
@@ -571,7 +629,7 @@
py_flags = re.UNICODE
if ignore_case:
py_flags = py_flags | re.IGNORECASE
- rexp_compiled = RE_COMPILE(rexp, py_flags)
+ rexp_compiled = re.compile(rexp, py_flags)
python.PyDict_SetItem(self._compile_map, key, rexp_compiled)
return rexp_compiled
More information about the lxml-checkins
mailing list