[Lxml-checkins] r32721 - in lxml/branch/lxml-1.1: . doc src/lxml src/lxml/tests

scoder at codespeak.net scoder at codespeak.net
Fri Sep 29 09:12:21 CEST 2006


Author: scoder
Date: Fri Sep 29 09:12:18 2006
New Revision: 32721

Modified:
   lxml/branch/lxml-1.1/CHANGES.txt
   lxml/branch/lxml-1.1/doc/api.txt
   lxml/branch/lxml-1.1/doc/build.txt
   lxml/branch/lxml-1.1/src/lxml/classlookup.pxi
   lxml/branch/lxml-1.1/src/lxml/etree.pyx
   lxml/branch/lxml-1.1/src/lxml/parser.pxi
   lxml/branch/lxml-1.1/src/lxml/tests/test_xslt.py
   lxml/branch/lxml-1.1/src/lxml/tree.pxd
   lxml/branch/lxml-1.1/src/lxml/xinclude.pxd
   lxml/branch/lxml-1.1/src/lxml/xslt.pxi
Log:
merge from trunk: XSLT-PI, doc updates, XInclude parser options

Modified: lxml/branch/lxml-1.1/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-1.1/CHANGES.txt	(original)
+++ lxml/branch/lxml-1.1/CHANGES.txt	Fri Sep 29 09:12:18 2006
@@ -2,13 +2,14 @@
 lxml changelog
 ==============
 
-=======
 current
 =======
 
 Features added
 --------------
 
+* Simplified support for handling XSLT processing instructions
+
 Bugs fixed
 ----------
 

Modified: lxml/branch/lxml-1.1/doc/api.txt
==============================================================================
--- lxml/branch/lxml-1.1/doc/api.txt	(original)
+++ lxml/branch/lxml-1.1/doc/api.txt	Fri Sep 29 09:12:18 2006
@@ -872,8 +872,8 @@
 xinclude
 --------
 
-Simple XInclude support exists. You can make xinclude statements in a
-document be processed by calling the xinclude() method on a tree::
+Simple XInclude support exists.  You can let lxml process xinclude statements
+in a document by calling the xinclude() method on a tree::
 
   >>> data = StringIO('''\
   ... <doc xmlns:xi="http://www.w3.org/2001/XInclude">
@@ -890,10 +890,10 @@
 write_c14n on ElementTree
 -------------------------
 
-The lxml.etree.ElementTree class has a method write_c14n, which takes
-one argument: a file object. This file object will receive an UTF-8
-representation of the canonicalized form of the XML, following the W3C
-C14N recommendation. For example::
+The lxml.etree.ElementTree class has a method write_c14n, which takes a file
+object as argument.  This file object will receive an UTF-8 representation of
+the canonicalized form of the XML, following the W3C C14N recommendation.  For
+example::
 
   >>> f = StringIO('<a><b/></a>')
   >>> tree = etree.parse(f)

Modified: lxml/branch/lxml-1.1/doc/build.txt
==============================================================================
--- lxml/branch/lxml-1.1/doc/build.txt	(original)
+++ lxml/branch/lxml-1.1/doc/build.txt	Fri Sep 29 09:12:18 2006
@@ -103,7 +103,7 @@
 building above), as it searches the ``src`` directory.  You can use the
 following one-step command to trigger an in-place build and test it::
 
-  make clean test
+  make test
 
 To run the ElementTree and cElementTree compatibility tests, make sure
 you have lxml on your PYTHONPATH first, then run::

Modified: lxml/branch/lxml-1.1/src/lxml/classlookup.pxi
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/classlookup.pxi	(original)
+++ lxml/branch/lxml-1.1/src/lxml/classlookup.pxi	Fri Sep 29 09:12:18 2006
@@ -120,6 +120,12 @@
             return (<ElementDefaultClassLookup>state).comment_class
     elif c_node.type == tree.XML_PI_NODE:
         if state is None:
+            # special case XSLT-PI
+            if c_node.name is not NULL and c_node.content is not NULL:
+                if cstd.strcmp(c_node.name, "xml-stylesheet") == 0:
+                    if cstd.strstr(c_node.content, "text/xsl") is not NULL or \
+                           cstd.strstr(c_node.content, "text/xml") is not NULL:
+                        return _XSLTProcessingInstruction
             return _ProcessingInstruction
         else:
             return (<ElementDefaultClassLookup>state).pi_class

Modified: lxml/branch/lxml-1.1/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/etree.pyx	(original)
+++ lxml/branch/lxml-1.1/src/lxml/etree.pyx	Fri Sep 29 09:12:18 2006
@@ -603,7 +603,13 @@
         # typed as elements.  The included fragment is added between the two,
         # i.e. as a sibling, which does not conflict with traversal.
         self._assertHasRoot()
-        result = xinclude.xmlXIncludeProcessTree(self._context_node._c_node)
+        if self._context_node._doc._parser != None:
+            result = xinclude.xmlXIncludeProcessTreeFlags(
+                self._context_node._c_node,
+                self._context_node._doc._parser._parse_options)
+        else:
+            result = xinclude.xmlXIncludeProcessTree(
+                self._context_node._c_node)
         if result == -1:
             raise XIncludeError, "XInclude processing failed"
 

Modified: lxml/branch/lxml-1.1/src/lxml/parser.pxi
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/parser.pxi	(original)
+++ lxml/branch/lxml-1.1/src/lxml/parser.pxi	Fri Sep 29 09:12:18 2006
@@ -865,7 +865,6 @@
 ############################################################
 
 cdef _Document _parseDocument(source, _BaseParser parser):
-    cdef xmlDoc* c_doc
     filename = _getFilenameForFile(source)
     if hasattr(source, 'getvalue') and hasattr(source, 'tell'):
         # StringIO - reading from start?
@@ -882,7 +881,11 @@
     if filename is None:
         filename = _encodeFilename(source)
     # open filename
-    c_doc = _parseDocFromFile(filename, parser)
+    return _parseDocumentFromURL(filename, parser)
+
+cdef _Document _parseDocumentFromURL(url, _BaseParser parser):
+    cdef xmlDoc* c_doc
+    c_doc = _parseDocFromFile(url, parser)
     return _documentFactory(c_doc, parser)
 
 cdef _Document _parseMemoryDocument(text, url, _BaseParser parser):

Modified: lxml/branch/lxml-1.1/src/lxml/tests/test_xslt.py
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/tests/test_xslt.py	(original)
+++ lxml/branch/lxml-1.1/src/lxml/tests/test_xslt.py	Fri Sep 29 09:12:18 2006
@@ -573,6 +573,82 @@
         result = xslt(root[0])
         root[:] = result.getroot()[:]
         del root # segfaulted before
+        
+    def test_xslt_pi(self):
+        tree = self.parse('''\
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="%s"?>
+<a>
+  <b>B</b>
+  <c>C</c>
+</a>''' % fileInTestDir("test1.xslt"))
+
+        style_root = tree.getroot().getprevious().parseXSL().getroot()
+        self.assertEquals("{http://www.w3.org/1999/XSL/Transform}stylesheet",
+                          style_root.tag)
+        
+    def test_xslt_pi_embedded_xmlid(self):
+        # test xml:id dictionary lookup mechanism
+        tree = self.parse('''\
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="#style"?>
+<a>
+  <b>B</b>
+  <c>C</c>
+  <xsl:stylesheet version="1.0" xml:id="style"
+      xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:template match="*" />
+    <xsl:template match="/">
+      <foo><xsl:value-of select="/a/b/text()" /></foo>
+    </xsl:template>
+  </xsl:stylesheet>
+</a>''')
+
+        style_root = tree.getroot().getprevious().parseXSL().getroot()
+        self.assertEquals("{http://www.w3.org/1999/XSL/Transform}stylesheet",
+                          style_root.tag)
+
+        st = etree.XSLT(style_root)
+        res = st.apply(tree)
+        self.assertEquals('''\
+<?xml version="1.0"?>
+<foo>B</foo>
+''',
+                          st.tostring(res))
+        
+    def test_xslt_pi_embedded_id(self):
+        # test XPath lookup mechanism
+        tree = self.parse('''\
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="#style"?>
+<a>
+  <b>B</b>
+  <c>C</c>
+</a>''')
+
+        style = self.parse('''\
+<xsl:stylesheet version="1.0" xml:id="style"
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+  <xsl:template match="*" />
+  <xsl:template match="/">
+    <foo><xsl:value-of select="/a/b/text()" /></foo>
+  </xsl:template>
+</xsl:stylesheet>
+''')
+
+        tree.getroot().append(style.getroot())
+
+        style_root = tree.getroot().getprevious().parseXSL().getroot()
+        self.assertEquals("{http://www.w3.org/1999/XSL/Transform}stylesheet",
+                          style_root.tag)
+
+        st = etree.XSLT(style_root)
+        res = st.apply(tree)
+        self.assertEquals('''\
+<?xml version="1.0"?>
+<foo>B</foo>
+''',
+                          st.tostring(res))
 
     def test_exslt_regexp_test(self):
         xslt = etree.XSLT(etree.XML("""\

Modified: lxml/branch/lxml-1.1/src/lxml/tree.pxd
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/tree.pxd	(original)
+++ lxml/branch/lxml-1.1/src/lxml/tree.pxd	Fri Sep 29 09:12:18 2006
@@ -210,7 +210,12 @@
     cdef char* xmlBufferContent(xmlBuffer* buf)
     cdef int xmlBufferLength(xmlBuffer* buf)
     cdef int xmlKeepBlanksDefault(int val)
-    
+    cdef char* xmlNodeGetBase(xmlDoc* doc, xmlNode* node)
+    cdef char* xmlBuildURI(char* href, char* base)
+
+cdef extern from "libxml/valid.h":
+    cdef xmlAttr* xmlGetID(xmlDoc* doc, char* ID)
+
 cdef extern from "libxml/xmlIO.h":
     cdef int xmlOutputBufferWriteString(xmlOutputBuffer* out, char* str)
     cdef int xmlOutputBufferFlush(xmlOutputBuffer* out)

Modified: lxml/branch/lxml-1.1/src/lxml/xinclude.pxd
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/xinclude.pxd	(original)
+++ lxml/branch/lxml-1.1/src/lxml/xinclude.pxd	Fri Sep 29 09:12:18 2006
@@ -3,5 +3,7 @@
 cdef extern from "libxml/xinclude.h":
     
     cdef int xmlXIncludeProcess(xmlDoc* doc)
+    cdef int xmlXIncludeProcessFlags(xmlDoc* doc, int parser_opts)
     cdef int xmlXIncludeProcessTree(xmlNode* doc)
+    cdef int xmlXIncludeProcessTreeFlags(xmlNode* doc, int parser_opts)
     

Modified: lxml/branch/lxml-1.1/src/lxml/xslt.pxi
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/xslt.pxi	(original)
+++ lxml/branch/lxml-1.1/src/lxml/xslt.pxi	Fri Sep 29 09:12:18 2006
@@ -545,10 +545,68 @@
 
 
 ################################################################################
-# EXSLT regexp implementation
+# XSLT PI support
+
+cdef object _FIND_PI_HREF
+_FIND_PI_HREF = re.compile('href\s*=\s*["\']([^"\']+)["\']').findall
+
+cdef XPath _findStylesheetByID
+_findStylesheetByID = XPath(
+    "//xsl:stylesheet[@xml:id = $id]",
+    {"xsl":"http://www.w3.org/1999/XSL/Transform"})
+
+cdef class _XSLTProcessingInstruction(PIBase):
+    def parseXSL(self, parser=None):
+        """Try to parse the stylesheet referenced by this PI and return an
+        ElementTree for it.  If the stylesheet is embedded in the same
+        document (referenced via xml:id), find and return an ElementTree for
+        the stylesheet Element.
+
+        The optional ``parser`` keyword argument can be passed to specify the
+        parser used to read from external stylesheet URLs.
+        """
+        cdef _Document result_doc
+        cdef _Element  result_node
+        cdef char* c_href
+        cdef xmlAttr* c_attr
+        if self._c_node.content is NULL:
+            raise ValueError, "PI lacks content"
+        hrefs_utf = _FIND_PI_HREF(self._c_node.content)
+        if len(hrefs_utf) != 1:
+            raise ValueError, "malformed PI attributes"
+        href_utf = hrefs_utf[0]
+        c_href = _cstr(href_utf)
+
+        if c_href[0] != c'#':
+            # normal URL, try to parse from it
+            c_href = tree.xmlBuildURI(
+                c_href,
+                tree.xmlNodeGetBase(self._c_node.doc, self._c_node))
+            if c_href is NULL:
+                c_href = _cstr(href_utf)
+            result_doc = _parseDocument(funicode(c_href), parser)
+            return _elementTreeFactory(result_doc, None)
+
+        # ID reference to embedded stylesheet
+        # try XML:ID lookup
+        c_href = c_href+1 # skip leading '#'
+        c_attr = tree.xmlGetID(self._c_node.doc, c_href)
+        if c_attr is not NULL and c_attr.doc is self._c_node.doc:
+            result_node = _elementFactory(self._doc, c_attr.parent)
+            return _elementTreeFactory(result_node._doc, result_node)
+
+        # try XPath search
+        root = _findStylesheetByID(self._doc, id=funicode(c_href))
+        if not root:
+            raise ValueError, "reference to non-existing embedded stylesheet"
+        elif len(root) > 1:
+            raise ValueError, "ambiguous reference to embedded stylesheet"
+        result_node = root[0]
+        return _elementTreeFactory(result_node._doc, result_node)
 
-cdef object RE_COMPILE
-RE_COMPILE = re.compile
+
+################################################################################
+# EXSLT regexp implementation
 
 cdef class _ExsltRegExp:
     cdef object _compile_map
@@ -571,7 +629,7 @@
         py_flags = re.UNICODE
         if ignore_case:
             py_flags = py_flags | re.IGNORECASE
-        rexp_compiled = RE_COMPILE(rexp, py_flags)
+        rexp_compiled = re.compile(rexp, py_flags)
         python.PyDict_SetItem(self._compile_map, key, rexp_compiled)
         return rexp_compiled
 


More information about the lxml-checkins mailing list