From scoder at codespeak.net Fri Nov 6 09:20:55 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 6 Nov 2009 09:20:55 +0100 (CET) Subject: [Lxml-checkins] r69020 - in lxml/trunk: . src/lxml Message-ID: <20091106082055.61D81168435@codespeak.net> Author: scoder Date: Fri Nov 6 09:20:54 2009 New Revision: 69020 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r5308 at delle: sbehnel | 2009-11-02 16:25:20 +0100 minor cleanup Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Nov 6 09:20:54 2009 @@ -265,12 +265,7 @@ # if there are no more references to the document, it is safe # to clean the whole thing up, as all nodes have a reference to # the document - #print "freeing document:", self._c_doc - #displayNode(self._c_doc, 0) - #print self._c_doc, self._c_doc.dict is __GLOBAL_PARSER_CONTEXT._c_dict - #print self._c_doc, canDeallocateChildNodes(self._c_doc) tree.xmlFreeDoc(self._c_doc) - #_deallocDocument(self._c_doc) cdef getroot(self): # return an element proxy for the document root @@ -397,7 +392,7 @@ cdef __initPrefixCache(): cdef int i return tuple([ python.PyString_FromFormat("ns%d", i) - for i from 0 <= i < 30 ]) + for i in range(30) ]) cdef object _PREFIX_CACHE _PREFIX_CACHE = __initPrefixCache() From scoder at codespeak.net Fri Nov 6 09:21:31 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 6 Nov 2009 09:21:31 +0100 (CET) Subject: [Lxml-checkins] r69021 - lxml/trunk Message-ID: <20091106082131.7CD61168435@codespeak.net> Author: scoder Date: Fri Nov 6 09:21:31 2009 New Revision: 69021 Modified: lxml/trunk/ (props changed) lxml/trunk/INSTALL.txt Log: r5310 at delle: sbehnel | 2009-11-06 09:21:26 +0100 clarification in installation instructions Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Fri Nov 6 09:21:31 2009 @@ -1,12 +1,17 @@ Installing lxml =============== +For special installation instructions regarding MS Windows and +MacOS-X, see below. + Requirements ------------ You need Python 2.3 or later. -You need libxml2 and libxslt, in particular: +Unless you are using a static binary distribution (e.g. a Windows +binary egg from PyPI), you need to install libxml2 and libxslt, in +particular: * libxml 2.6.21 or later. It can be found here: http://xmlsoft.org/downloads.html @@ -25,16 +30,35 @@ Installation ------------ -If you have easy_install_, you can run the following as super-user (or +Get the `easy_install`_ tool and run the following as super-user (or administrator):: easy_install lxml .. _easy_install: http://peak.telecommunity.com/DevCenter/EasyInstall -This has been reported to work on Linux, MacOS-X 10.4 and Windows, as long as -libxml2 and libxslt are properly installed (including development packages, -i.e. header files, etc.). +* On **MS Windows**, the above will install the binary builds that we + provide. If there is no binary build of the latest release yet, + please search PyPI_ for the last release that has them and pass that + version to ``easy_install`` like this: + + easy_install lxml==2.2.2 + +* On **Linux** (and most other well-behaved operating systems), + ``easy_install`` will manage to build the source distribution as + long as libxml2 and libxslt are properly installed, including + development packages, i.e. header files, etc. Use your package + management tool to look for packages like ``libxml2-dev`` or + ``libxslt-devel`` if the build fails, and make sure they are + installed. + +* On **MacOS-X**, use the following to build the source distribution, + and make sure you have a working Internet connection, as this will + download libxml2 and libxslt in order to build them:: + + STATIC_DEPS=true easy_install lxml + +.. _PyPI: http://cheeseshop.python.org/pypi/lxml Building lxml from sources @@ -65,9 +89,14 @@ no need to install the external libraries if you use an official lxml build from PyPI. -If you want to upgrade the libraries and/or compile lxml from sources, you -should install a `binary distribution`_ of libxml2 and libxslt. You need both -libxml2 and libxslt, as well as iconv and zlib. +Unless you know what you are doing, this means: *do not install +libxml2 or libxslt if you use a binary build of lxml*. Just use +``easy_install`` by following the installation instructions above. + +*Only* if you want to upgrade the libraries and/or compile lxml from +sources, you should install a `binary distribution`_ of libxml2 and +libxslt. You need both libxml2 and libxslt, as well as iconv and +zlib. .. _`binary distribution`: http://www.zlatkovic.com/libxml.en.html .. _`binary egg distribution of lxml`: http://cheeseshop.python.org/pypi/lxml @@ -76,17 +105,25 @@ MacOS-X ------- -The system libraries of libxml2 and libxslt installed under MacOS-X -tend to be rather outdated. In any case, they are older than the -required versions for lxml 2.x, so you will have a hard time getting -lxml to work without installing newer libraries. - -A number of users reported success with updated libraries (e.g. using -fink_ or macports), but needed to set the runtime environment variable -``DYLD_LIBRARY_PATH`` to the directory where fink keeps the libraries. -See the `FAQ entry on MacOS-X`_ for more information. +A macport of lxml is available. Try ``port install py25-lxml``. + +If you want to use a more recent lxml release, you may have to build +it yourself. Apple doesn't help here, as the system libraries of +libxml2 and libxslt installed under MacOS-X are horribly outdated, and +updating them is everything but easy. In any case, you cannot run +lxml 2.x with the system provided libraries, so you have to use newer +libraries. + +Luckily, lxml's ``setup.py`` script has built-in support for building +and integrating these libraries statically during the build. Please +read the `MacOS-X build instructions`_. +.. _`MacOS-X build instructions`: build.html#building-lxml-on-macos-x .. _fink: http://finkproject.org/ -.. _`FAQ entry on MacOS-X`: FAQ.html#my-application-crashes-on-macos-x -A macport of lxml is available. Try ``port install py25-lxml``. +A number of users also reported success with updated libraries +(e.g. using fink_ or macports), but needed to set the runtime +environment variable ``DYLD_LIBRARY_PATH`` to the directory where fink +keeps the libraries. In any case, this method is easy to get wrong +and everything but safe. Unless you know what you are doing, follow +the static build instructions above. From scoder at codespeak.net Fri Nov 6 09:22:56 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 6 Nov 2009 09:22:56 +0100 (CET) Subject: [Lxml-checkins] r69022 - lxml/branch/lxml-2.2 Message-ID: <20091106082256.13A81168435@codespeak.net> Author: scoder Date: Fri Nov 6 09:22:55 2009 New Revision: 69022 Modified: lxml/branch/lxml-2.2/INSTALL.txt (contents, props changed) Log: trunk merge: docs Modified: lxml/branch/lxml-2.2/INSTALL.txt ============================================================================== --- lxml/branch/lxml-2.2/INSTALL.txt (original) +++ lxml/branch/lxml-2.2/INSTALL.txt Fri Nov 6 09:22:55 2009 @@ -1,12 +1,17 @@ Installing lxml =============== +For special installation instructions regarding MS Windows and +MacOS-X, see below. + Requirements ------------ You need Python 2.3 or later. -You need libxml2 and libxslt, in particular: +Unless you are using a static binary distribution (e.g. a Windows +binary egg from PyPI), you need to install libxml2 and libxslt, in +particular: * libxml 2.6.21 or later. It can be found here: http://xmlsoft.org/downloads.html @@ -25,16 +30,35 @@ Installation ------------ -If you have easy_install_, you can run the following as super-user (or +Get the `easy_install`_ tool and run the following as super-user (or administrator):: easy_install lxml .. _easy_install: http://peak.telecommunity.com/DevCenter/EasyInstall -This has been reported to work on Linux, MacOS-X 10.4 and Windows, as long as -libxml2 and libxslt are properly installed (including development packages, -i.e. header files, etc.). +* On **MS Windows**, the above will install the binary builds that we + provide. If there is no binary build of the latest release yet, + please search PyPI_ for the last release that has them and pass that + version to ``easy_install`` like this: + + easy_install lxml==2.2.2 + +* On **Linux** (and most other well-behaved operating systems), + ``easy_install`` will manage to build the source distribution as + long as libxml2 and libxslt are properly installed, including + development packages, i.e. header files, etc. Use your package + management tool to look for packages like ``libxml2-dev`` or + ``libxslt-devel`` if the build fails, and make sure they are + installed. + +* On **MacOS-X**, use the following to build the source distribution, + and make sure you have a working Internet connection, as this will + download libxml2 and libxslt in order to build them:: + + STATIC_DEPS=true easy_install lxml + +.. _PyPI: http://cheeseshop.python.org/pypi/lxml Building lxml from sources @@ -65,9 +89,14 @@ no need to install the external libraries if you use an official lxml build from PyPI. -If you want to upgrade the libraries and/or compile lxml from sources, you -should install a `binary distribution`_ of libxml2 and libxslt. You need both -libxml2 and libxslt, as well as iconv and zlib. +Unless you know what you are doing, this means: *do not install +libxml2 or libxslt if you use a binary build of lxml*. Just use +``easy_install`` by following the installation instructions above. + +*Only* if you want to upgrade the libraries and/or compile lxml from +sources, you should install a `binary distribution`_ of libxml2 and +libxslt. You need both libxml2 and libxslt, as well as iconv and +zlib. .. _`binary distribution`: http://www.zlatkovic.com/libxml.en.html .. _`binary egg distribution of lxml`: http://cheeseshop.python.org/pypi/lxml @@ -76,17 +105,25 @@ MacOS-X ------- -The system libraries of libxml2 and libxslt installed under MacOS-X -tend to be rather outdated. In any case, they are older than the -required versions for lxml 2.x, so you will have a hard time getting -lxml to work without installing newer libraries. - -A number of users reported success with updated libraries (e.g. using -fink_ or macports), but needed to set the runtime environment variable -``DYLD_LIBRARY_PATH`` to the directory where fink keeps the libraries. -See the `FAQ entry on MacOS-X`_ for more information. +A macport of lxml is available. Try ``port install py25-lxml``. + +If you want to use a more recent lxml release, you may have to build +it yourself. Apple doesn't help here, as the system libraries of +libxml2 and libxslt installed under MacOS-X are horribly outdated, and +updating them is everything but easy. In any case, you cannot run +lxml 2.x with the system provided libraries, so you have to use newer +libraries. + +Luckily, lxml's ``setup.py`` script has built-in support for building +and integrating these libraries statically during the build. Please +read the `MacOS-X build instructions`_. +.. _`MacOS-X build instructions`: build.html#building-lxml-on-macos-x .. _fink: http://finkproject.org/ -.. _`FAQ entry on MacOS-X`: FAQ.html#my-application-crashes-on-macos-x -A macport of lxml is available. Try ``port install py25-lxml``. +A number of users also reported success with updated libraries +(e.g. using fink_ or macports), but needed to set the runtime +environment variable ``DYLD_LIBRARY_PATH`` to the directory where fink +keeps the libraries. In any case, this method is easy to get wrong +and everything but safe. Unless you know what you are doing, follow +the static build instructions above. From scoder at codespeak.net Fri Nov 6 09:27:46 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 6 Nov 2009 09:27:46 +0100 (CET) Subject: [Lxml-checkins] r69023 - lxml/trunk Message-ID: <20091106082746.8B3E0168435@codespeak.net> Author: scoder Date: Fri Nov 6 09:27:46 2009 New Revision: 69023 Modified: lxml/trunk/ (props changed) lxml/trunk/INSTALL.txt Log: r5312 at delle: sbehnel | 2009-11-06 09:27:41 +0100 typo Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Fri Nov 6 09:27:46 2009 @@ -40,7 +40,7 @@ * On **MS Windows**, the above will install the binary builds that we provide. If there is no binary build of the latest release yet, please search PyPI_ for the last release that has them and pass that - version to ``easy_install`` like this: + version to ``easy_install`` like this:: easy_install lxml==2.2.2 From scoder at codespeak.net Fri Nov 6 09:28:08 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 6 Nov 2009 09:28:08 +0100 (CET) Subject: [Lxml-checkins] r69024 - lxml/branch/lxml-2.2 Message-ID: <20091106082808.29B11168435@codespeak.net> Author: scoder Date: Fri Nov 6 09:28:07 2009 New Revision: 69024 Modified: lxml/branch/lxml-2.2/INSTALL.txt (contents, props changed) Log: trunk merge: docs Modified: lxml/branch/lxml-2.2/INSTALL.txt ============================================================================== --- lxml/branch/lxml-2.2/INSTALL.txt (original) +++ lxml/branch/lxml-2.2/INSTALL.txt Fri Nov 6 09:28:07 2009 @@ -40,7 +40,7 @@ * On **MS Windows**, the above will install the binary builds that we provide. If there is no binary build of the latest release yet, please search PyPI_ for the last release that has them and pass that - version to ``easy_install`` like this: + version to ``easy_install`` like this:: easy_install lxml==2.2.2 From scoder at codespeak.net Wed Nov 11 16:30:05 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 11 Nov 2009 16:30:05 +0100 (CET) Subject: [Lxml-checkins] r69175 - lxml/branch/lxml-2.2 Message-ID: <20091111153005.0BEBE1680FE@codespeak.net> Author: scoder Date: Wed Nov 11 16:30:04 2009 New Revision: 69175 Modified: lxml/branch/lxml-2.2/setupinfo.py Log: fix static build Modified: lxml/branch/lxml-2.2/setupinfo.py ============================================================================== --- lxml/branch/lxml-2.2/setupinfo.py (original) +++ lxml/branch/lxml-2.2/setupinfo.py Wed Nov 11 16:30:04 2009 @@ -46,7 +46,6 @@ 'libs', 'build/tmp', static_include_dirs, static_library_dirs, static_cflags, static_binaries, - libiconv_version=OPTION_LIBICONV_VERSION, libxml2_version=OPTION_LIBXML2_VERSION, libxslt_version=OPTION_LIBXSLT_VERSION) if CYTHON_INSTALLED: @@ -338,4 +337,3 @@ OPTION_STATIC = True OPTION_LIBXML2_VERSION = option_value('libxml2-version') OPTION_LIBXSLT_VERSION = option_value('libxslt-version') -OPTION_LIBICONV_VERSION = option_value('libiconv-version') From scoder at codespeak.net Wed Nov 11 16:30:26 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 11 Nov 2009 16:30:26 +0100 (CET) Subject: [Lxml-checkins] r69176 - in lxml/branch/lxml-2.2: . doc Message-ID: <20091111153026.A3A381680FE@codespeak.net> Author: scoder Date: Wed Nov 11 16:30:26 2009 New Revision: 69176 Modified: lxml/branch/lxml-2.2/CHANGES.txt lxml/branch/lxml-2.2/doc/main.txt lxml/branch/lxml-2.2/version.txt Log: prepare release of 2.2.4 Modified: lxml/branch/lxml-2.2/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.2/CHANGES.txt (original) +++ lxml/branch/lxml-2.2/CHANGES.txt Wed Nov 11 16:30:26 2009 @@ -2,6 +2,15 @@ lxml changelog ============== +2.2.4 (2009-11-11) +================== + +Bugs fixed +---------- + +* Static build of libxml2/libxslt was broken. + + 2.2.3 (2009-10-30) ================== Modified: lxml/branch/lxml-2.2/doc/main.txt ============================================================================== --- lxml/branch/lxml-2.2/doc/main.txt (original) +++ lxml/branch/lxml-2.2/doc/main.txt Wed Nov 11 16:30:26 2009 @@ -147,8 +147,8 @@ source release. If you can't wait, consider trying a less recent release version first. -The latest version is `lxml 2.2.3`_, released 2009-10-30 -(`changes for 2.2.3`_). `Older versions`_ are listed below. +The latest version is `lxml 2.2.4`_, released 2009-10-30 +(`changes for 2.2.4`_). `Older versions`_ are listed below. Please take a look at the `installation instructions`_! @@ -221,7 +221,9 @@ `_ and the `current in-development version `_. -.. _`PDF documentation`: lxmldoc-2.2.3.pdf +.. _`PDF documentation`: lxmldoc-2.2.4.pdf + +* `lxml 2.2.3`_, released 2009-10-30 (`changes for 2.2.3`_) * `lxml 2.2.2`_, released 2009-06-21 (`changes for 2.2.2`_) @@ -325,6 +327,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.2.4`: lxml-2.2.4.tgz .. _`lxml 2.2.3`: lxml-2.2.3.tgz .. _`lxml 2.2.2`: lxml-2.2.2.tgz .. _`lxml 2.2.1`: lxml-2.2.1.tgz @@ -377,6 +380,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.2.4`: changes-2.2.4.html .. _`changes for 2.2.3`: changes-2.2.3.html .. _`changes for 2.2.2`: changes-2.2.2.html .. _`changes for 2.2.1`: changes-2.2.1.html Modified: lxml/branch/lxml-2.2/version.txt ============================================================================== --- lxml/branch/lxml-2.2/version.txt (original) +++ lxml/branch/lxml-2.2/version.txt Wed Nov 11 16:30:26 2009 @@ -1 +1 @@ -2.2.3 +2.2.4 From scoder at codespeak.net Wed Nov 11 16:31:33 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 11 Nov 2009 16:31:33 +0100 (CET) Subject: [Lxml-checkins] r69177 - lxml/tag/lxml-2.2.4 Message-ID: <20091111153133.6D9051680FE@codespeak.net> Author: scoder Date: Wed Nov 11 16:31:32 2009 New Revision: 69177 Added: lxml/tag/lxml-2.2.4/ - copied from r69176, lxml/branch/lxml-2.2/ Log: tag for 2.2.4 From scoder at codespeak.net Wed Nov 11 21:42:54 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 11 Nov 2009 21:42:54 +0100 (CET) Subject: [Lxml-checkins] r69203 - lxml/branch/lxml-2.2 Message-ID: <20091111204254.4A03516802D@codespeak.net> Author: scoder Date: Wed Nov 11 21:42:52 2009 New Revision: 69203 Modified: lxml/branch/lxml-2.2/INSTALL.txt Log: docs: install in ActivePython Modified: lxml/branch/lxml-2.2/INSTALL.txt ============================================================================== --- lxml/branch/lxml-2.2/INSTALL.txt (original) +++ lxml/branch/lxml-2.2/INSTALL.txt Wed Nov 11 21:42:52 2009 @@ -4,6 +4,15 @@ For special installation instructions regarding MS Windows and MacOS-X, see below. +.. contents:: + 1 Requirements + 2 Installation + 3 Installation in ActivePython + 4 Building lxml from sources + 5 MS Windows + 6 MacOS-X + + Requirements ------------ @@ -61,6 +70,30 @@ .. _PyPI: http://cheeseshop.python.org/pypi/lxml +Installation in ActivePython +---------------------------- + +ActiveState_ provides ready-made lxml builds for different platforms +in its `package repository`_ for the PyPM_ package manager. PyPM is +similar to apt-get in that there is a repository of automaticaly +pre-built packages for Windows, Mac and Linux. + +To install lxml in ActivePython, type the following on one of these +operating systems:: + + $ pypm install lxml + +To test the installation, try:: + + $ python -c "import lxml; print lxml.__file__" + +This should show you the directory where the package was installed. + +.. _ActiveState: http://www.activestate.com/ +.. _PyPM: http://docs.activestate.com/activepython/2.6/pypm.html +.. _`package repository`: http://pypm.activestate.com/ + + Building lxml from sources -------------------------- From scoder at codespeak.net Wed Nov 11 21:44:56 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 11 Nov 2009 21:44:56 +0100 (CET) Subject: [Lxml-checkins] r69204 - in lxml/branch/lxml-2.2: . src/lxml Message-ID: <20091111204456.9A5AE16802C@codespeak.net> Author: scoder Date: Wed Nov 11 21:44:56 2009 New Revision: 69204 Modified: lxml/branch/lxml-2.2/CHANGES.txt lxml/branch/lxml-2.2/src/lxml/xsltext.pxi Log: fix XSLT extension elements when running in a non-element XSLT context Modified: lxml/branch/lxml-2.2/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.2/CHANGES.txt (original) +++ lxml/branch/lxml-2.2/CHANGES.txt Wed Nov 11 21:44:56 2009 @@ -2,6 +2,22 @@ lxml changelog ============== +2.2.5 (?) +================== + +Features added +-------------- + +* Support for running XSLT extension elements on the input root node + (e.g. in a template matching on "/"). + +Bugs fixed +---------- + +* Crash in XSLT extension elements when the XSLT context node is not + an element. + + 2.2.4 (2009-11-11) ================== Modified: lxml/branch/lxml-2.2/src/lxml/xsltext.pxi ============================================================================== --- lxml/branch/lxml-2.2/src/lxml/xsltext.pxi (original) +++ lxml/branch/lxml-2.2/src/lxml/xsltext.pxi Wed Nov 11 21:44:56 2009 @@ -81,6 +81,7 @@ cdef XSLTExtension extension cdef python.PyObject* dict_result cdef char* c_uri + cdef xmlNode* c_node cdef _ReadOnlyElementProxy context_node, self_node, output_parent c_uri = _getNs(c_inst_node) if c_uri is NULL: @@ -100,9 +101,21 @@ try: self_node = _newReadOnlyProxy(None, c_inst_node) - context_node = _newReadOnlyProxy(self_node, c_context_node) output_parent = _newAppendOnlyProxy(self_node, c_ctxt.insert) + if c_context_node.type == tree.XML_ELEMENT_NODE: + context_node = _newReadOnlyProxy(self_node, c_context_node) + elif c_context_node.type in (tree.XML_DOCUMENT_NODE, + tree.XML_HTML_DOCUMENT_NODE): + c_node = tree.xmlDocGetRootElement(c_context_node) + if c_node is not NULL: + context_node = _newReadOnlyProxy(self_node, c_node) + else: + context_node = None + else: + raise TypeError, \ + u"Unsupported XSLT context node type %d, extension elements require an element" % c_context_node.type + context._extension_element_proxy = self_node extension.execute(context, self_node, context_node, output_parent) finally: From scoder at codespeak.net Wed Nov 11 21:50:04 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 11 Nov 2009 21:50:04 +0100 (CET) Subject: [Lxml-checkins] r69205 - lxml/branch/lxml-2.2/doc Message-ID: <20091111205004.8D70116802C@codespeak.net> Author: scoder Date: Wed Nov 11 21:50:03 2009 New Revision: 69205 Modified: lxml/branch/lxml-2.2/doc/main.txt Log: fix release date Modified: lxml/branch/lxml-2.2/doc/main.txt ============================================================================== --- lxml/branch/lxml-2.2/doc/main.txt (original) +++ lxml/branch/lxml-2.2/doc/main.txt Wed Nov 11 21:50:03 2009 @@ -147,7 +147,7 @@ source release. If you can't wait, consider trying a less recent release version first. -The latest version is `lxml 2.2.4`_, released 2009-10-30 +The latest version is `lxml 2.2.4`_, released 2009-11-11 (`changes for 2.2.4`_). `Older versions`_ are listed below. Please take a look at the `installation instructions`_! From scoder at codespeak.net Wed Nov 11 21:51:50 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 11 Nov 2009 21:51:50 +0100 (CET) Subject: [Lxml-checkins] r69206 - lxml/trunk Message-ID: <20091111205150.3FB6E168032@codespeak.net> Author: scoder Date: Wed Nov 11 21:51:49 2009 New Revision: 69206 Modified: lxml/trunk/ (props changed) lxml/trunk/INSTALL.txt Log: doc merge from 2.2 branch Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Wed Nov 11 21:51:49 2009 @@ -4,6 +4,15 @@ For special installation instructions regarding MS Windows and MacOS-X, see below. +.. contents:: + 1 Requirements + 2 Installation + 3 Installation in ActivePython + 4 Building lxml from sources + 5 MS Windows + 6 MacOS-X + + Requirements ------------ @@ -61,6 +70,30 @@ .. _PyPI: http://cheeseshop.python.org/pypi/lxml +Installation in ActivePython +---------------------------- + +ActiveState_ provides ready-made lxml builds for different platforms +in its `package repository`_ for the PyPM_ package manager. PyPM is +similar to apt-get in that there is a repository of automaticaly +pre-built packages for Windows, Mac and Linux. + +To install lxml in ActivePython, type the following on one of these +operating systems:: + + $ pypm install lxml + +To test the installation, try:: + + $ python -c "import lxml; print lxml.__file__" + +This should show you the directory where the package was installed. + +.. _ActiveState: http://www.activestate.com/ +.. _PyPM: http://docs.activestate.com/activepython/2.6/pypm.html +.. _`package repository`: http://pypm.activestate.com/ + + Building lxml from sources -------------------------- From scoder at codespeak.net Wed Nov 11 22:12:23 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 11 Nov 2009 22:12:23 +0100 (CET) Subject: [Lxml-checkins] r69207 - lxml/branch/lxml-2.2/src/lxml/tests Message-ID: <20091111211223.E71C716802D@codespeak.net> Author: scoder Date: Wed Nov 11 22:12:22 2009 New Revision: 69207 Modified: lxml/branch/lxml-2.2/src/lxml/tests/test_xslt.py Log: additional tests for XSLT extension elements running in non-element contexts Modified: lxml/branch/lxml-2.2/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/branch/lxml-2.2/src/lxml/tests/test_xslt.py (original) +++ lxml/branch/lxml-2.2/src/lxml/tests/test_xslt.py Wed Nov 11 22:12:22 2009 @@ -721,6 +721,54 @@ self.assertEquals(self._rootstring(result), _bytes('X')) + def test_extension_element_doc_context(self): + tree = self.parse('B') + style = self.parse('''\ + + + b + +''') + + tags = [] + + class MyExt(etree.XSLTExtension): + def execute(self, context, self_node, input_node, output_parent): + tags.append(input_node.tag) + + extensions = { ('testns', 'myext') : MyExt() } + + result = tree.xslt(style, extensions=extensions) + self.assertEquals(tags, ['a']) + + def test_extension_element_unsupported_context(self): + tree = self.parse('') + style = self.parse('''\ + + + b + +''') + + tags = [] + + class MyExt(etree.XSLTExtension): + def execute(self, context, self_node, input_node, output_parent): + tags.append(input_node.tag) + + extensions = { ('testns', 'myext') : MyExt() } + + self.assertRaises(TypeError, tree.xslt, style, extensions=extensions) + self.assertEquals(tags, []) + def test_extension_element_content(self): tree = self.parse('B') style = self.parse('''\ From scoder at codespeak.net Fri Nov 13 08:33:54 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 13 Nov 2009 08:33:54 +0100 (CET) Subject: [Lxml-checkins] r69245 - lxml/branch/lxml-2.2 Message-ID: <20091113073354.4E7B7168026@codespeak.net> Author: scoder Date: Fri Nov 13 08:33:53 2009 New Revision: 69245 Modified: lxml/branch/lxml-2.2/INSTALL.txt Log: doc fix Modified: lxml/branch/lxml-2.2/INSTALL.txt ============================================================================== --- lxml/branch/lxml-2.2/INSTALL.txt (original) +++ lxml/branch/lxml-2.2/INSTALL.txt Fri Nov 13 08:33:53 2009 @@ -5,6 +5,7 @@ MacOS-X, see below. .. contents:: +.. 1 Requirements 2 Installation 3 Installation in ActivePython From scoder at codespeak.net Tue Nov 17 15:20:46 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 17 Nov 2009 15:20:46 +0100 (CET) Subject: [Lxml-checkins] r69351 - lxml/branch/lxml-2.2 Message-ID: <20091117142046.9FE2D318139@codespeak.net> Author: scoder Date: Tue Nov 17 15:20:46 2009 New Revision: 69351 Modified: lxml/branch/lxml-2.2/buildlibxml.py Log: static build fix for MacOS 10.6 Modified: lxml/branch/lxml-2.2/buildlibxml.py ============================================================================== --- lxml/branch/lxml-2.2/buildlibxml.py (original) +++ lxml/branch/lxml-2.2/buildlibxml.py Tue Nov 17 15:20:46 2009 @@ -175,15 +175,23 @@ call_setup = {} env_setup = None if sys.platform in ('darwin',): + import platform # We compile Universal if we are on a machine > 10.3 - major_version = int(os.uname()[2].split('.')[0]) + major_version, minor_version = map(int, platform.mac_ver()[0].split('.')[:2]) if major_version > 7: env = os.environ.copy() - env.update({ - 'CFLAGS' : "-arch ppc -arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk -O2", - 'LDFLAGS' : "-arch ppc -arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk", - 'MACOSX_DEPLOYMENT_TARGET' : "10.3" - }) + if minor_version < 6: + env.update({ + 'CFLAGS' : "-arch ppc -arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk -O2", + 'LDFLAGS' : "-arch ppc -arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk", + 'MACOSX_DEPLOYMENT_TARGET' : "10.3" + }) + else: + env.update({ + 'CFLAGS' : "-arch ppc -arch i386 -arch x86_64 -O2", + 'LDFLAGS' : "-arch ppc -arch i386 -arch x86_64", + 'MACOSX_DEPLOYMENT_TARGET' : "10.6" + }) call_setup['env'] = env # We may loose the link to iconv, so make sure it's there From scoder at codespeak.net Tue Nov 17 20:15:01 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 17 Nov 2009 20:15:01 +0100 (CET) Subject: [Lxml-checkins] r69357 - lxml/branch/lxml-2.2/doc Message-ID: <20091117191501.4E0C131813D@codespeak.net> Author: scoder Date: Tue Nov 17 20:14:59 2009 New Revision: 69357 Modified: lxml/branch/lxml-2.2/doc/build.txt Log: mac build docs: show how to install Modified: lxml/branch/lxml-2.2/doc/build.txt ============================================================================== --- lxml/branch/lxml-2.2/doc/build.txt (original) +++ lxml/branch/lxml-2.2/doc/build.txt Tue Nov 17 20:14:59 2009 @@ -209,6 +209,8 @@ --libxml2-version=2.7.3 \ --libxslt-version=1.1.24 \ + sudo python setup.py install + Instead of ``build``, you can use any target, like ``bdist_egg`` if you want to use setuptools to build an installable egg. From scoder at codespeak.net Sun Nov 22 04:37:55 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 22 Nov 2009 04:37:55 +0100 (CET) Subject: [Lxml-checkins] r69500 - lxml/branch/lxml-2.2 Message-ID: <20091122033755.59A3E168026@codespeak.net> Author: scoder Date: Sun Nov 22 04:37:53 2009 New Revision: 69500 Modified: lxml/branch/lxml-2.2/test.py Log: test runner fix for Py2.7/3.2 Modified: lxml/branch/lxml-2.2/test.py ============================================================================== --- lxml/branch/lxml-2.2/test.py (original) +++ lxml/branch/lxml-2.2/test.py Sun Nov 22 04:37:53 2009 @@ -71,11 +71,18 @@ import getopt import unittest import traceback + try: set except NameError: from sets import Set as set +try: + # Python >=2.7 and >=3.2 + from unittest.runner import _TextTestResult +except ImportError: + from unittest import _TextTestResult + __metaclass__ = type def stderr(text): @@ -302,14 +309,14 @@ return results -class CustomTestResult(unittest._TextTestResult): +class CustomTestResult(_TextTestResult): """Customised TestResult. It can show a progress bar, and displays tracebacks for errors and failures as soon as they happen, in addition to listing them all at the end. """ - __super = unittest._TextTestResult + __super = _TextTestResult __super_init = __super.__init__ __super_startTest = __super.startTest __super_stopTest = __super.stopTest From scoder at codespeak.net Tue Nov 24 18:58:44 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Nov 2009 18:58:44 +0100 (CET) Subject: [Lxml-checkins] r69597 - lxml/trunk Message-ID: <20091124175844.BF4D1168006@codespeak.net> Author: scoder Date: Tue Nov 24 18:58:42 2009 New Revision: 69597 Modified: lxml/trunk/ (props changed) lxml/trunk/INSTALL.txt Log: r5315 at delle: sbehnel | 2009-11-11 15:41:12 +0100 doc clarification Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Tue Nov 24 18:58:42 2009 @@ -2,7 +2,7 @@ =============== For special installation instructions regarding MS Windows and -MacOS-X, see below. +MacOS-X, see the specific sections below. .. contents:: 1 Requirements @@ -65,7 +65,7 @@ and make sure you have a working Internet connection, as this will download libxml2 and libxslt in order to build them:: - STATIC_DEPS=true easy_install lxml + STATIC_DEPS=true sudo easy_install lxml .. _PyPI: http://cheeseshop.python.org/pypi/lxml From scoder at codespeak.net Tue Nov 24 18:58:53 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Nov 2009 18:58:53 +0100 (CET) Subject: [Lxml-checkins] r69598 - in lxml/trunk: . src/lxml Message-ID: <20091124175853.9376D168006@codespeak.net> Author: scoder Date: Tue Nov 24 18:58:52 2009 New Revision: 69598 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/etree_defs.h lxml/trunk/src/lxml/iterparse.pxi lxml/trunk/src/lxml/lxml.objectify.pyx lxml/trunk/src/lxml/objectpath.pxi lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/serializer.pxi lxml/trunk/src/lxml/xpath.pxi lxml/trunk/src/lxml/xslt.pxi Log: r5316 at delle: sbehnel | 2009-11-11 17:26:12 +0100 use PyBytes_*() instead of PyString_*() Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Tue Nov 24 18:58:52 2009 @@ -652,7 +652,7 @@ elif isinstance(value, CDATA): c_text_node = tree.xmlNewCDataBlock( c_node.doc, _cstr((value)._utf8_data), - python.PyString_GET_SIZE((value)._utf8_data)) + python.PyBytes_GET_SIZE((value)._utf8_data)) else: # this will raise the right error _utf8(value) @@ -683,7 +683,7 @@ else: c_ns = element._doc._findOrBuildNodeNs( element._c_node, _cstr(ns), NULL) - return python.PyString_FromFormat('%s:%s', c_ns.prefix, _cstr(tag)) + return python.PyBytes_FromFormat('%s:%s', c_ns.prefix, _cstr(tag)) cdef inline bint _hasChild(xmlNode* c_node): return c_node is not NULL and _findChildForwards(c_node, 0) is not NULL @@ -1197,7 +1197,7 @@ cdef char c cdef bint is_non_ascii s = _cstr(pystring) - c_end = s + python.PyString_GET_SIZE(pystring) + c_end = s + python.PyBytes_GET_SIZE(pystring) is_non_ascii = 0 while s < c_end: if s[0] & 0x80: @@ -1229,7 +1229,7 @@ slen = spos - s if is_non_ascii: return python.PyUnicode_DecodeUTF8(s, slen, NULL) - return python.PyString_FromStringAndSize(s, slen) + return python.PyBytes_FromStringAndSize(s, slen) cdef object _utf8(object s): cdef int invalid @@ -1321,13 +1321,13 @@ try: # try to decode with default encoding filename = python.PyUnicode_Decode( - c_filename, python.PyString_GET_SIZE(filename), + c_filename, python.PyBytes_GET_SIZE(filename), _C_FILENAME_ENCODING, NULL) except UnicodeDecodeError, decode_exc: try: # try if it's UTF-8 filename = python.PyUnicode_DecodeUTF8( - c_filename, python.PyString_GET_SIZE(filename), NULL) + c_filename, python.PyBytes_GET_SIZE(filename), NULL) except UnicodeDecodeError: raise decode_exc # otherwise re-raise original exception if python.PyUnicode_Check(filename): @@ -1354,13 +1354,13 @@ if c_ns_end is NULL: raise ValueError, u"Invalid tag name" nslen = c_ns_end - c_tag - taglen = python.PyString_GET_SIZE(tag) - nslen - 2 + taglen = python.PyBytes_GET_SIZE(tag) - nslen - 2 if taglen == 0: raise ValueError, u"Empty tag name" if nslen > 0: - ns = python.PyString_FromStringAndSize(c_tag, nslen) - tag = python.PyString_FromStringAndSize(c_ns_end+1, taglen) - elif python.PyString_GET_SIZE(tag) == 0: + ns = python.PyBytes_FromStringAndSize(c_tag, nslen) + tag = python.PyBytes_FromStringAndSize(c_ns_end+1, taglen) + elif python.PyBytes_GET_SIZE(tag) == 0: raise ValueError, u"Empty tag name" return ns, tag @@ -1445,7 +1445,7 @@ elif python.IS_PYTHON3: return python.PyUnicode_FromFormat("{%s}%s", href, name) else: - s = python.PyString_FromFormat("{%s}%s", href, name) + s = python.PyBytes_FromFormat("{%s}%s", href, name) if isutf8(href) or isutf8(name): return python.PyUnicode_FromEncodedObject(s, 'UTF-8', NULL) else: Modified: lxml/trunk/src/lxml/etree_defs.h ============================================================================== --- lxml/trunk/src/lxml/etree_defs.h (original) +++ lxml/trunk/src/lxml/etree_defs.h Tue Nov 24 18:58:52 2009 @@ -36,6 +36,12 @@ # define PyString_GET_SIZE(s) PyBytes_GET_SIZE(s) # define PyString_AS_STRING(s) PyBytes_AS_STRING(s) #else +# define PyBytes_CheckExact(o) PyString_CheckExact(o) +# define PyBytes_Check(o) PyString_Check(o) +# define PyBytes_FromStringAndSize(s, len) PyString_FromStringAndSize(s, len) +# define PyBytes_FromFormat PyString_FromFormat +# define PyBytes_GET_SIZE(s) PyString_GET_SIZE(s) +# define PyBytes_AS_STRING(s) PyString_AS_STRING(s) #if PY_VERSION_HEX < 0x02060000 /* we currently only use three parameters - MSVC can't compile (s, ...) */ # define PyUnicode_FromFormat(s, a, b) (NULL) Modified: lxml/trunk/src/lxml/iterparse.pxi ============================================================================== --- lxml/trunk/src/lxml/iterparse.pxi (original) +++ lxml/trunk/src/lxml/iterparse.pxi Tue Nov 24 18:58:52 2009 @@ -482,13 +482,13 @@ if not python.PyBytes_Check(data): self._source = None raise TypeError, u"reading file objects must return plain strings" - c_data_len = python.PyString_GET_SIZE(data) + c_data_len = python.PyBytes_GET_SIZE(data) c_data = _cstr(data) done = (c_data_len == 0) error = self._parse_chunk(pctxt, c_data, c_data_len, done) else: if self._buffer is None: - self._buffer = python.PyString_FromStringAndSize( + self._buffer = python.PyBytes_FromStringAndSize( NULL, __ITERPARSE_CHUNK_SIZE) c_data = _cstr(self._buffer) with nogil: Modified: lxml/trunk/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.objectify.pyx (original) +++ lxml/trunk/src/lxml/lxml.objectify.pyx Tue Nov 24 18:58:52 2009 @@ -466,7 +466,7 @@ c_node = parent._c_node ns, tag = cetree.getNsTag(tag) c_tag = tree.xmlDictExists( - c_node.doc.dict, _cstr(tag), python.PyString_GET_SIZE(tag)) + c_node.doc.dict, _cstr(tag), python.PyBytes_GET_SIZE(tag)) if c_tag is NULL: return None if ns is None: Modified: lxml/trunk/src/lxml/objectpath.pxi ============================================================================== --- lxml/trunk/src/lxml/objectpath.pxi (original) +++ lxml/trunk/src/lxml/objectpath.pxi Tue Nov 24 18:58:52 2009 @@ -153,11 +153,11 @@ if index_end is NULL: raise ValueError, u"index must be enclosed in []" index = int( - python.PyString_FromStringAndSize( + python.PyBytes_FromStringAndSize( index_pos + 1, (index_end - index_pos - 1))) if python.PyList_GET_SIZE(new_path) == 0 and index != 0: raise ValueError, u"index not allowed on root node" - name = python.PyString_FromStringAndSize( + name = python.PyBytes_FromStringAndSize( c_name, (index_pos - c_name)) new_path.append( (ns, name, index) ) if python.PyList_GET_SIZE(new_path) == 0: Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Tue Nov 24 18:58:52 2009 @@ -347,7 +347,7 @@ return 0 try: c_byte_count = 0 - byte_count = python.PyString_GET_SIZE(self._bytes) + byte_count = python.PyBytes_GET_SIZE(self._bytes) remaining = byte_count - self._bytes_read while c_requested > remaining: c_start = _cstr(self._bytes) + self._bytes_read @@ -368,7 +368,7 @@ raise TypeError, \ u"reading from file-like objects must return byte strings or unicode strings" - remaining = python.PyString_GET_SIZE(self._bytes) + remaining = python.PyBytes_GET_SIZE(self._bytes) if remaining == 0: self._bytes_read = -1 return c_byte_count @@ -435,7 +435,7 @@ c_input = xmlparser.xmlNewInputStream(c_context) if c_input is not NULL: c_input.base = _cstr(data) - c_input.length = python.PyString_GET_SIZE(data) + c_input.length = python.PyBytes_GET_SIZE(data) c_input.cur = c_input.base c_input.end = &c_input.base[c_input.length] elif doc_ref._type == PARSER_DATA_FILENAME: @@ -870,7 +870,7 @@ py_buffer_len = python.PyUnicode_GET_DATA_SIZE(utext) if py_buffer_len > python.INT_MAX or _UNICODE_ENCODING is NULL: text_utf = python.PyUnicode_AsUTF8String(utext) - py_buffer_len = python.PyString_GET_SIZE(text_utf) + py_buffer_len = python.PyBytes_GET_SIZE(text_utf) return self._parseDoc(_cstr(text_utf), py_buffer_len, c_filename) buffer_len = py_buffer_len @@ -1049,7 +1049,7 @@ else: c_encoding = self._default_encoding c_data = _cstr(data) - py_buffer_len = python.PyString_GET_SIZE(data) + py_buffer_len = python.PyBytes_GET_SIZE(data) elif python.PyUnicode_Check(data): if _UNICODE_ENCODING is NULL: raise ParserError, \ @@ -1416,7 +1416,7 @@ StringIO(text), filename) return (<_BaseParser>parser)._parseUnicodeDoc(text, c_filename) else: - c_len = python.PyString_GET_SIZE(text) + c_len = python.PyBytes_GET_SIZE(text) if c_len > python.INT_MAX: return (<_BaseParser>parser)._parseDocFromFilelike( BytesIO(text), filename) Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Tue Nov 24 18:58:52 2009 @@ -40,9 +40,10 @@ cdef char* PyUnicode_AS_DATA(object ustring) cdef Py_ssize_t PyUnicode_GET_DATA_SIZE(object ustring) cdef Py_ssize_t PyUnicode_GET_SIZE(object ustring) - cdef object PyString_FromStringAndSize(char* s, Py_ssize_t size) - cdef object PyString_FromFormat(char* format, ...) - cdef Py_ssize_t PyString_GET_SIZE(object s) + cdef object PyBytes_FromStringAndSize(char* s, Py_ssize_t size) + cdef object PyBytes_FromFormat(char* format, ...) + cdef object PyString_FromFormat(char* format, ...) # to be deleted! + cdef Py_ssize_t PyBytes_GET_SIZE(object s) cdef object PyNumber_Int(object value) cdef Py_ssize_t PyInt_AsSsize_t(object value) @@ -107,7 +108,7 @@ # some handy functions cdef int callable "PyCallable_Check" (object obj) - cdef char* _cstr "PyString_AS_STRING" (object s) + cdef char* _cstr "PyBytes_AS_STRING" (object s) # Py_buffer related flags cdef int PyBUF_SIMPLE Modified: lxml/trunk/src/lxml/serializer.pxi ============================================================================== --- lxml/trunk/src/lxml/serializer.pxi (original) +++ lxml/trunk/src/lxml/serializer.pxi Tue Nov 24 18:58:52 2009 @@ -130,7 +130,7 @@ tree.xmlBufferLength(c_result_buffer), 'strict') else: - result = python.PyString_FromStringAndSize( + result = python.PyBytes_FromStringAndSize( tree.xmlBufferContent(c_result_buffer), tree.xmlBufferLength(c_result_buffer)) finally: @@ -332,7 +332,7 @@ try: if self._filelike is None: raise IOError, u"File is already closed" - py_buffer = python.PyString_FromStringAndSize(c_buffer, size) + py_buffer = python.PyBytes_FromStringAndSize(c_buffer, size) self._filelike.write(py_buffer) return size except: Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Tue Nov 24 18:58:52 2009 @@ -473,7 +473,7 @@ i = 1 for namespace_def in _find_namespaces(stripped_path): if namespace_def not in namespace_defs: - prefix = python.PyString_FromFormat("__xpp%02d", i) + prefix = python.PyBytes_FromFormat("__xpp%02d", i) i += 1 namespace_defs.append(namespace_def) namespace = namespace_def[1:-1] # remove '{}' Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Tue Nov 24 18:58:52 2009 @@ -689,7 +689,7 @@ return '' # we must not use 'funicode' here as this is not always UTF-8 try: - result = python.PyString_FromStringAndSize(s, l) + result = python.PyBytes_FromStringAndSize(s, l) finally: tree.xmlFree(s) return result From scoder at codespeak.net Tue Nov 24 18:58:57 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Nov 2009 18:58:57 +0100 (CET) Subject: [Lxml-checkins] r69599 - in lxml/trunk: . src/lxml Message-ID: <20091124175857.DD8AF168008@codespeak.net> Author: scoder Date: Tue Nov 24 18:58:57 2009 New Revision: 69599 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/apihelpers.pxi Log: r5317 at delle: sbehnel | 2009-11-11 20:44:21 +0100 type fix Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Tue Nov 24 18:58:57 2009 @@ -675,7 +675,7 @@ tree.xmlAddNextSibling(c_node, c_text_node) return 0 -cdef _resolveQNameText(_Element element, value): +cdef bytes _resolveQNameText(_Element element, value): cdef xmlNs* c_ns ns, tag = _getNsTag(value) if ns is None: From scoder at codespeak.net Tue Nov 24 18:59:11 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Nov 2009 18:59:11 +0100 (CET) Subject: [Lxml-checkins] r69600 - in lxml/trunk: . src/lxml Message-ID: <20091124175911.2773D168006@codespeak.net> Author: scoder Date: Tue Nov 24 18:59:10 2009 New Revision: 69600 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r5327 at delle: sbehnel | 2009-11-12 09:13:43 +0100 more PyString_ -> PyBytes_ changes Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Nov 24 18:59:10 2009 @@ -332,7 +332,7 @@ ns = python.PyTuple_GET_ITEM(_PREFIX_CACHE, self._ns_counter) python.Py_INCREF(ns) else: - ns = python.PyString_FromFormat("ns%d", self._ns_counter) + ns = python.PyBytes_FromFormat("ns%d", self._ns_counter) if self._prefix_tail is not None: ns += self._prefix_tail self._ns_counter += 1 @@ -391,7 +391,7 @@ cdef __initPrefixCache(): cdef int i - return tuple([ python.PyString_FromFormat("ns%d", i) + return tuple([ python.PyBytes_FromFormat("ns%d", i) for i in range(30) ]) cdef object _PREFIX_CACHE @@ -1467,11 +1467,10 @@ return funicode(self._c_node.name) def __set__(self, value): - value = _utf8(value) + value_utf = _utf8(value) assert u'&' not in value and u';' not in value, \ u"Invalid entity name '%s'" % value - c_text = _cstr(value) - tree.xmlNodeSetName(self._c_node, c_text) + tree.xmlNodeSetName(self._c_node, _cstr(value_utf)) property text: # FIXME: should this be None or '&[VALUE];' or the resolved From scoder at codespeak.net Tue Nov 24 18:59:14 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Nov 2009 18:59:14 +0100 (CET) Subject: [Lxml-checkins] r69601 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20091124175914.0726F168008@codespeak.net> Author: scoder Date: Tue Nov 24 18:59:14 2009 New Revision: 69601 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/cssselect.py lxml/trunk/src/lxml/tests/test_css.txt Log: r5328 at delle: sbehnel | 2009-11-13 14:07:16 +0100 fix ticket 481641: support treating CSS attribute selectors independent of element selectors Modified: lxml/trunk/src/lxml/cssselect.py ============================================================================== --- lxml/trunk/src/lxml/cssselect.py (original) +++ lxml/trunk/src/lxml/cssselect.py Tue Nov 24 18:59:14 2009 @@ -865,7 +865,7 @@ pos += 2 continue if c in '>+~,.*=[]()|:#': - if c in '.#' and preceding_whitespace_pos > 0: + if c in '.#[' and preceding_whitespace_pos > 0: yield Token(' ', preceding_whitespace_pos) yield Token(c, pos) pos += 1 Modified: lxml/trunk/src/lxml/tests/test_css.txt ============================================================================== --- lxml/trunk/src/lxml/tests/test_css.txt (original) +++ lxml/trunk/src/lxml/tests/test_css.txt Tue Nov 24 18:59:14 2009 @@ -26,6 +26,8 @@ Pseudo[Element[td]:first] >>> parse('a[name]') Attrib[Element[a][name]] + >>> parse('a [name]') + CombinedSelector[Element[a] Attrib[Element[*][name]]] >>> repr(parse('a[rel="include"]')).replace("u'", "'") "Attrib[Element[a][rel = String('include', 6)]]" >>> repr(parse('a[hreflang |= \'en\']')).replace("u'", "'") From scoder at codespeak.net Tue Nov 24 18:59:18 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Nov 2009 18:59:18 +0100 (CET) Subject: [Lxml-checkins] r69602 - in lxml/trunk: . doc Message-ID: <20091124175918.DC51816800B@codespeak.net> Author: scoder Date: Tue Nov 24 18:59:18 2009 New Revision: 69602 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/INSTALL.txt lxml/trunk/doc/main.txt Log: r5329 at delle: sbehnel | 2009-11-13 14:59:55 +0100 2.2 branch doc merge Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Nov 24 18:59:18 2009 @@ -8,6 +8,9 @@ Features added -------------- +* Support for XSLT context nodes other than elements: document root, + comments, processing instructions. + * Support for strings (in addition to Elements) in node-sets returned by extension functions. @@ -52,6 +55,15 @@ * Static builds include libiconv, in addition to libxml2 and libxslt. +2.2.4 (2009-11-11) +================== + +Bugs fixed +---------- + +* Static build of libxml2/libxslt was broken. + + 2.2.3 (2009-10-30) ================== Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Tue Nov 24 18:59:18 2009 @@ -5,6 +5,7 @@ MacOS-X, see the specific sections below. .. contents:: +.. 1 Requirements 2 Installation 3 Installation in ActivePython Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Tue Nov 24 18:59:18 2009 @@ -147,8 +147,8 @@ source release. If you can't wait, consider trying a less recent release version first. -The latest version is `lxml 2.2.2`_, released 2009-06-21 -(`changes for 2.2.2`_). `Older versions`_ are listed below. +The latest version is `lxml 2.2.4`_, released 2009-11-11 +(`changes for 2.2.4`_). `Older versions`_ are listed below. Please take a look at the `installation instructions`_! @@ -221,7 +221,7 @@ `_ and the `current in-development version `_. -.. _`PDF documentation`: lxmldoc-2.2.3.pdf +.. _`PDF documentation`: lxmldoc-2.2.4.pdf * `lxml 2.2.3`_, released 2009-10-30 (`changes for 2.2.3`_) @@ -327,6 +327,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.2.4`: lxml-2.2.4.tgz .. _`lxml 2.2.3`: lxml-2.2.3.tgz .. _`lxml 2.2.2`: lxml-2.2.2.tgz .. _`lxml 2.2.1`: lxml-2.2.1.tgz @@ -379,7 +380,8 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz -.. _`changes for 2.2.3`: changes-2.2.2.html +.. _`changes for 2.2.4`: changes-2.2.4.html +.. _`changes for 2.2.3`: changes-2.2.3.html .. _`changes for 2.2.2`: changes-2.2.2.html .. _`changes for 2.2.1`: changes-2.2.1.html .. _`changes for 2.2`: changes-2.2.html From scoder at codespeak.net Tue Nov 24 18:59:22 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Nov 2009 18:59:22 +0100 (CET) Subject: [Lxml-checkins] r69603 - lxml/trunk Message-ID: <20091124175922.13AFB16800B@codespeak.net> Author: scoder Date: Tue Nov 24 18:59:21 2009 New Revision: 69603 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r5330 at delle: sbehnel | 2009-11-13 17:30:53 +0100 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Nov 24 18:59:21 2009 @@ -28,6 +28,10 @@ Bugs fixed ---------- +* ``lxml.cssselect`` did not distinguish between ``x[attr="val"]`` and + ``x [attr="val"]`` (with a space). The latter now matches the + attribute independent of the element. + * Rewriting multiple links inside of HTML text content could end up replacing unrelated content as replacements could impact the reported position of subsequent matches. Modifications are now From scoder at codespeak.net Tue Nov 24 18:59:25 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Nov 2009 18:59:25 +0100 (CET) Subject: [Lxml-checkins] r69604 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20091124175925.DD032168008@codespeak.net> Author: scoder Date: Tue Nov 24 18:59:25 2009 New Revision: 69604 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/readonlytree.pxi lxml/trunk/src/lxml/tests/test_xslt.py lxml/trunk/src/lxml/xsltext.pxi Log: r5331 at delle: sbehnel | 2009-11-15 11:44:03 +0100 support for several different node types as XSLT extension element context nodes Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Nov 24 18:59:25 2009 @@ -8,8 +8,8 @@ Features added -------------- -* Support for XSLT context nodes other than elements: document root, - comments, processing instructions. +* XSLT extension elements: support for XSLT context nodes other than + elements: document root, comments, processing instructions. * Support for strings (in addition to Elements) in node-sets returned by extension functions. Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Tue Nov 24 18:59:25 2009 @@ -26,23 +26,22 @@ cdef bint PyBytes_Check(object obj) cdef bint PyBytes_CheckExact(object obj) - cdef object PyUnicode_FromEncodedObject(object s, char* encoding, + cdef unicode PyUnicode_FromEncodedObject(object s, char* encoding, char* errors) - cdef object PyUnicode_AsEncodedString(object u, char* encoding, + cdef bytes PyUnicode_AsEncodedString(object u, char* encoding, char* errors) - cdef object PyUnicode_FromFormat(char* format, ...) # Python 3 - cdef object PyUnicode_Decode(char* s, Py_ssize_t size, - char* encoding, char* errors) - cdef object PyUnicode_DecodeUTF8(char* s, Py_ssize_t size, char* errors) - cdef object PyUnicode_DecodeLatin1(char* s, Py_ssize_t size, char* errors) - cdef object PyUnicode_AsUTF8String(object ustring) - cdef object PyUnicode_AsASCIIString(object ustring) + cdef unicode PyUnicode_FromFormat(char* format, ...) # Python 3 + cdef unicode PyUnicode_Decode(char* s, Py_ssize_t size, + char* encoding, char* errors) + cdef unicode PyUnicode_DecodeUTF8(char* s, Py_ssize_t size, char* errors) + cdef unicode PyUnicode_DecodeLatin1(char* s, Py_ssize_t size, char* errors) + cdef bytes PyUnicode_AsUTF8String(object ustring) + cdef bytes PyUnicode_AsASCIIString(object ustring) cdef char* PyUnicode_AS_DATA(object ustring) cdef Py_ssize_t PyUnicode_GET_DATA_SIZE(object ustring) cdef Py_ssize_t PyUnicode_GET_SIZE(object ustring) - cdef object PyBytes_FromStringAndSize(char* s, Py_ssize_t size) - cdef object PyBytes_FromFormat(char* format, ...) - cdef object PyString_FromFormat(char* format, ...) # to be deleted! + cdef bytes PyBytes_FromStringAndSize(char* s, Py_ssize_t size) + cdef bytes PyBytes_FromFormat(char* format, ...) cdef Py_ssize_t PyBytes_GET_SIZE(object s) cdef object PyNumber_Int(object value) Modified: lxml/trunk/src/lxml/readonlytree.pxi ============================================================================== --- lxml/trunk/src/lxml/readonlytree.pxi (original) +++ lxml/trunk/src/lxml/readonlytree.pxi Tue Nov 24 18:59:25 2009 @@ -1,10 +1,10 @@ # read-only tree implementation -cdef class _ReadOnlyElementProxy: - u"The main read-only Element proxy class (for internal use only!)." +cdef class _ReadOnlyProxy: + u"A read-only proxy class suitable for PIs/Comments (for internal use only!)." cdef bint _free_after_use cdef xmlNode* _c_node - cdef object _source_proxy + cdef _ReadOnlyProxy _source_proxy cdef list _dependent_proxies cdef int _assertNode(self) except -1: @@ -13,6 +13,9 @@ assert self._c_node is not NULL, u"Proxy invalidated!" return 0 + cdef int _raise_unsupported_type(self): + raise TypeError("Unsupported node type: %d" % self._c_node.type) + cdef void free_after_use(self): u"""Should the xmlNode* be freed when releasing the proxy? """ @@ -23,7 +26,16 @@ """ def __get__(self): self._assertNode() - return _namespacedName(self._c_node) + if self._c_node.type == tree.XML_ELEMENT_NODE: + return _namespacedName(self._c_node) + elif self._c_node.type == tree.XML_PI_NODE: + return ProcessingInstruction + elif self._c_node.type == tree.XML_COMMENT_NODE: + return Comment + elif self._c_node.type == tree.XML_ENTITY_REF_NODE: + return Entity + else: + self._raise_unsupported_type() property text: u"""Text before the first subelement. This is either a string or @@ -31,7 +43,18 @@ """ def __get__(self): self._assertNode() - return _collectText(self._c_node.children) + if self._c_node.type == tree.XML_ELEMENT_NODE: + return _collectText(self._c_node.children) + elif self._c_node.type in (tree.XML_PI_NODE, + tree.XML_COMMENT_NODE): + if self._c_node.content is NULL: + return '' + else: + return funicode(self._c_node.content) + elif self._c_node.type == tree.XML_ENTITY_REF_NODE: + return u'&%s;' % funicode(self._c_node.name) + else: + self._raise_unsupported_type() property tail: u"""Text after this element's end tag, but before the next sibling @@ -42,21 +65,6 @@ self._assertNode() return _collectText(self._c_node.next) - property attrib: - def __get__(self): - self._assertNode() - return dict(_collectAttributes(self._c_node, 3)) - - property prefix: - u"""Namespace prefix or None. - """ - def __get__(self): - self._assertNode() - if self._c_node.ns is not NULL: - if self._c_node.ns.prefix is not NULL: - return funicode(self._c_node.ns.prefix) - return None - property sourceline: u"""Original line number as found by the parser or None if unknown. """ @@ -70,7 +78,21 @@ return None def __repr__(self): - return u"" % (self.tag, id(self)) + self._assertNode() + if self._c_node.type == tree.XML_ELEMENT_NODE: + return u"" % (self.tag, id(self)) + elif self._c_node.type == tree.XML_COMMENT_NODE: + return u"" % self.text + elif self._c_node.type == tree.XML_ENTITY_NODE: + return u"&%s;" % funicode(self._c_node.name) + elif self._c_node.type == tree.XML_PI_NODE: + text = self.text + if text: + return u"" % (self.target, text) + else: + return u"" % self.target + else: + self._raise_unsupported_type() def __getitem__(self, x): u"""Returns the subelement at the given position or the requested @@ -81,6 +103,7 @@ cdef Py_ssize_t c, i cdef _node_to_node_function next_element cdef list result + self._assertNode() if python.PySlice_Check(x): # slicing if _isFullSlice(x): @@ -133,11 +156,13 @@ u"__deepcopy__(self, memo)" return self.__copy__() - def __copy__(self): + cpdef __copy__(self): u"__copy__(self)" cdef xmlDoc* c_doc cdef xmlNode* c_node cdef _Document new_doc + if self._c_node is NULL: + return self c_doc = _copyDocRoot(self._c_node.doc, self._c_node) # recursive new_doc = _documentFactory(c_doc, None) root = new_doc.getroot() @@ -166,33 +191,6 @@ children = children[::-1] return iter(children) - def get(self, key, default=None): - u"""Gets an element attribute. - """ - self._assertNode() - return _getNodeAttributeValue(self._c_node, key, default) - - def keys(self): - u"""Gets a list of attribute names. The names are returned in an - arbitrary order (just like for an ordinary Python dictionary). - """ - self._assertNode() - return _collectAttributes(self._c_node, 1) - - def values(self): - u"""Gets element attributes, as a sequence. The attributes are returned - in an arbitrary order. - """ - self._assertNode() - return _collectAttributes(self._c_node, 2) - - def items(self): - u"""Gets element attributes, as a sequence. The attributes are returned - in an arbitrary order. - """ - self._assertNode() - return _collectAttributes(self._c_node, 3) - cpdef getchildren(self): u"""Returns all subelements. The elements are returned in document order. @@ -240,20 +238,98 @@ return None +cdef class _ReadOnlyPIProxy(_ReadOnlyProxy): + u"A read-only proxy for processing instructions (for internal use only!)" + property target: + def __get__(self): + self._assertNode() + return funicode(self._c_node.name) + +cdef class _ReadOnlyEntityProxy(_ReadOnlyProxy): + u"A read-only proxy for entity references (for internal use only!)" + property name: + def __get__(self): + return funicode(self._c_node.name) + + def __set__(self, value): + value_utf = _utf8(value) + assert u'&' not in value and u';' not in value, \ + u"Invalid entity name '%s'" % value + tree.xmlNodeSetName(self._c_node, _cstr(value_utf)) + + property text: + def __get__(self): + return u'&%s;' % funicode(self._c_node.name) + + +cdef class _ReadOnlyElementProxy(_ReadOnlyProxy): + u"The main read-only Element proxy class (for internal use only!)." + + property attrib: + def __get__(self): + self._assertNode() + return dict(_collectAttributes(self._c_node, 3)) + + property prefix: + u"""Namespace prefix or None. + """ + def __get__(self): + self._assertNode() + if self._c_node.ns is not NULL: + if self._c_node.ns.prefix is not NULL: + return funicode(self._c_node.ns.prefix) + return None + + def get(self, key, default=None): + u"""Gets an element attribute. + """ + self._assertNode() + return _getNodeAttributeValue(self._c_node, key, default) + + def keys(self): + u"""Gets a list of attribute names. The names are returned in an + arbitrary order (just like for an ordinary Python dictionary). + """ + self._assertNode() + return _collectAttributes(self._c_node, 1) + + def values(self): + u"""Gets element attributes, as a sequence. The attributes are returned + in an arbitrary order. + """ + self._assertNode() + return _collectAttributes(self._c_node, 2) + + def items(self): + u"""Gets element attributes, as a sequence. The attributes are returned + in an arbitrary order. + """ + self._assertNode() + return _collectAttributes(self._c_node, 3) + + cdef extern from "etree_defs.h": # macro call to 't->tp_new()' for fast instantiation - cdef _ReadOnlyElementProxy NEW_RO_PROXY "PY_NEW" (object t) + cdef _ReadOnlyProxy NEW_RO_PROXY "PY_NEW" (object t) -cdef _ReadOnlyElementProxy _newReadOnlyProxy( - _ReadOnlyElementProxy source_proxy, xmlNode* c_node): - cdef _ReadOnlyElementProxy el - el = NEW_RO_PROXY(_ReadOnlyElementProxy) +cdef _ReadOnlyProxy _newReadOnlyProxy( + _ReadOnlyProxy source_proxy, xmlNode* c_node): + cdef _ReadOnlyProxy el + if c_node.type == tree.XML_ELEMENT_NODE: + el = NEW_RO_PROXY(_ReadOnlyElementProxy) + elif c_node.type == tree.XML_PI_NODE: + el = NEW_RO_PROXY(_ReadOnlyPIProxy) + elif c_node.type in (tree.XML_COMMENT_NODE, + tree.XML_ENTITY_REF_NODE): + el = NEW_RO_PROXY(_ReadOnlyProxy) + else: + raise TypeError("Unsupported element type: %d" % c_node.type) el._c_node = c_node _initReadOnlyProxy(el, source_proxy) return el -cdef inline _initReadOnlyProxy(_ReadOnlyElementProxy el, - _ReadOnlyElementProxy source_proxy): +cdef inline _initReadOnlyProxy(_ReadOnlyProxy el, + _ReadOnlyProxy source_proxy): el._free_after_use = 0 if source_proxy is None: el._source_proxy = el @@ -262,9 +338,9 @@ el._source_proxy = source_proxy source_proxy._dependent_proxies.append(el) -cdef _freeReadOnlyProxies(_ReadOnlyElementProxy sourceProxy): +cdef _freeReadOnlyProxies(_ReadOnlyProxy sourceProxy): cdef xmlNode* c_node - cdef _ReadOnlyElementProxy el + cdef _ReadOnlyProxy el if sourceProxy is None: return if sourceProxy._dependent_proxies is None: @@ -276,6 +352,58 @@ tree.xmlFreeNode(c_node) del sourceProxy._dependent_proxies[:] + +cdef class _ModifyContentOnlyProxy(_ReadOnlyProxy): + u"""A read-only proxy that allows changing the text content. + """ + property text: + def __get__(self): + self._assertNode() + if self._c_node.content is NULL: + return '' + else: + return funicode(self._c_node.content) + + def __set__(self, value): + cdef tree.xmlDict* c_dict + cdef char* c_text + self._assertNode() + if value is None: + c_text = NULL + else: + value = _utf8(value) + c_text = _cstr(value) + tree.xmlNodeSetContent(self._c_node, c_text) + +cdef class _ModifyContentOnlyPIProxy(_ModifyContentOnlyProxy): + u"""A read-only proxy that allows changing the text/target content of a + processing instruction. + """ + property target: + def __get__(self): + self._assertNode() + return funicode(self._c_node.name) + + def __set__(self, value): + self._assertNode() + value = _utf8(value) + c_text = _cstr(value) + tree.xmlNodeSetName(self._c_node, c_text) + +cdef class _ModifyContentOnlyEntityProxy(_ModifyContentOnlyProxy): + u"A read-only proxy for entity references (for internal use only!)" + property name: + def __get__(self): + return funicode(self._c_node.name) + + def __set__(self, value): + value = _utf8(value) + assert u'&' not in value and u';' not in value, \ + u"Invalid entity name '%s'" % value + c_text = _cstr(value) + tree.xmlNodeSetName(self._c_node, c_text) + + cdef class _AppendOnlyElementProxy(_ReadOnlyElementProxy): u"""A read-only element that allows adding children and changing the text content (i.e. everything that adds to the subtree). @@ -315,10 +443,16 @@ _resolveQNameText(self, value), 'UTF-8', 'strict') _setNodeText(self._c_node, value) -cdef _AppendOnlyElementProxy _newAppendOnlyProxy( - _ReadOnlyElementProxy source_proxy, xmlNode* c_node): - cdef _AppendOnlyElementProxy el - el = <_AppendOnlyElementProxy>NEW_RO_PROXY(_AppendOnlyElementProxy) + +cdef _ReadOnlyProxy _newAppendOnlyProxy( + _ReadOnlyProxy source_proxy, xmlNode* c_node): + cdef _ReadOnlyProxy el + if c_node.type == tree.XML_ELEMENT_NODE: + el = NEW_RO_PROXY(_AppendOnlyElementProxy) + elif c_node.type == tree.XML_PI_NODE: + el = NEW_RO_PROXY(_ModifyContentOnlyPIProxy) + elif c_node.type == tree.XML_COMMENT_NODE: + el = NEW_RO_PROXY(_ModifyContentOnlyProxy) el._c_node = c_node _initReadOnlyProxy(el, source_proxy) return el @@ -327,8 +461,8 @@ cdef xmlNode* c_node if isinstance(element, _Element): c_node = (<_Element>element)._c_node - elif isinstance(element, _ReadOnlyElementProxy): - c_node = (<_ReadOnlyElementProxy>element)._c_node + elif isinstance(element, _ReadOnlyProxy): + c_node = (<_ReadOnlyProxy>element)._c_node else: raise TypeError, u"invalid value to append()" Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Tue Nov 24 18:59:25 2009 @@ -628,175 +628,6 @@ self.assertEquals(self._rootstring(result), _bytes('C')) - def test_extensions1(self): - tree = self.parse('B') - style = self.parse('''\ - - -''') - - def mytext(ctxt, values): - return 'X' * len(values) - - result = tree.xslt(style, {('testns', 'mytext') : mytext}) - self.assertEquals(self._rootstring(result), - _bytes('X')) - - def test_extensions2(self): - tree = self.parse('B') - style = self.parse('''\ - - -''') - - def mytext(ctxt, values): - return 'X' * len(values) - - namespace = etree.FunctionNamespace('testns') - namespace['mytext'] = mytext - - result = tree.xslt(style) - self.assertEquals(self._rootstring(result), - _bytes('X')) - - def test_variable_result_tree_fragment(self): - tree = self.parse('B') - style = self.parse('''\ - - - - - - - - BBB -''') - - def mytext(ctxt, values): - for value in values: - self.assert_(hasattr(value, 'tag'), - "%s is not an Element" % type(value)) - self.assertEquals(value.tag, 'b') - self.assertEquals(value.text, 'BBB') - return 'X'.join([el.tag for el in values]) - - namespace = etree.FunctionNamespace('testns') - namespace['mytext'] = mytext - - result = tree.xslt(style) - self.assertEquals(self._rootstring(result), - _bytes('bXb')) - - def test_extension_element(self): - tree = self.parse('B') - style = self.parse('''\ - - - b - -''') - - class MyExt(etree.XSLTExtension): - def execute(self, context, self_node, input_node, output_parent): - child = etree.Element(self_node.text) - child.text = 'X' - output_parent.append(child) - - extensions = { ('testns', 'myext') : MyExt() } - - result = tree.xslt(style, extensions=extensions) - self.assertEquals(self._rootstring(result), - _bytes('X')) - - def test_extension_element_content(self): - tree = self.parse('B') - style = self.parse('''\ - - - XY - -''') - - class MyExt(etree.XSLTExtension): - def execute(self, context, self_node, input_node, output_parent): - output_parent.extend(list(self_node)[1:]) - - extensions = { ('testns', 'myext') : MyExt() } - - result = tree.xslt(style, extensions=extensions) - self.assertEquals(self._rootstring(result), - _bytes('Y')) - - def test_extension_element_apply_templates(self): - tree = self.parse('B') - style = self.parse('''\ - - - XY - - - XYZ -''') - - class MyExt(etree.XSLTExtension): - def execute(self, context, self_node, input_node, output_parent): - for child in self_node: - for result in self.apply_templates(context, child): - if isinstance(result, basestring): - el = etree.Element("T") - el.text = result - else: - el = result - output_parent.append(el) - - extensions = { ('testns', 'myext') : MyExt() } - - result = tree.xslt(style, extensions=extensions) - self.assertEquals(self._rootstring(result), - _bytes('YXYZ')) - - def test_extension_element_raise(self): - tree = self.parse('B') - style = self.parse('''\ - - - b - -''') - - class MyError(Exception): - pass - - class MyExt(etree.XSLTExtension): - def execute(self, context, self_node, input_node, output_parent): - raise MyError("expected!") - - extensions = { ('testns', 'myext') : MyExt() } - self.assertRaises(MyError, tree.xslt, style, extensions=extensions) - def test_xslt_document_XML(self): # make sure document('') works from parsed strings xslt = etree.XSLT(etree.XML("""\ @@ -1389,6 +1220,265 @@ self.assertEquals(root[3].text, "test") +class ETreeXSLTExtFuncTestCase(HelperTestCase): + """Tests for XPath extension functions in XSLT.""" + + def test_extensions1(self): + tree = self.parse('B') + style = self.parse('''\ + + +''') + + def mytext(ctxt, values): + return 'X' * len(values) + + result = tree.xslt(style, {('testns', 'mytext') : mytext}) + self.assertEquals(self._rootstring(result), + _bytes('X')) + + def test_extensions2(self): + tree = self.parse('B') + style = self.parse('''\ + + +''') + + def mytext(ctxt, values): + return 'X' * len(values) + + namespace = etree.FunctionNamespace('testns') + namespace['mytext'] = mytext + + result = tree.xslt(style) + self.assertEquals(self._rootstring(result), + _bytes('X')) + + def test_variable_result_tree_fragment(self): + tree = self.parse('B') + style = self.parse('''\ + + + + + + + + BBB +''') + + def mytext(ctxt, values): + for value in values: + self.assert_(hasattr(value, 'tag'), + "%s is not an Element" % type(value)) + self.assertEquals(value.tag, 'b') + self.assertEquals(value.text, 'BBB') + return 'X'.join([el.tag for el in values]) + + namespace = etree.FunctionNamespace('testns') + namespace['mytext'] = mytext + + result = tree.xslt(style) + self.assertEquals(self._rootstring(result), + _bytes('bXb')) + + +class ETreeXSLTExtElementTestCase(HelperTestCase): + """Tests for extension elements in XSLT.""" + + def test_extension_element(self): + tree = self.parse('B') + style = self.parse('''\ + + + b + +''') + + class MyExt(etree.XSLTExtension): + def execute(self, context, self_node, input_node, output_parent): + child = etree.Element(self_node.text) + child.text = 'X' + output_parent.append(child) + + extensions = { ('testns', 'myext') : MyExt() } + + result = tree.xslt(style, extensions=extensions) + self.assertEquals(self._rootstring(result), + _bytes('X')) + + def test_extension_element_doc_context(self): + tree = self.parse('B') + style = self.parse('''\ + + + b + +''') + + tags = [] + + class MyExt(etree.XSLTExtension): + def execute(self, context, self_node, input_node, output_parent): + tags.append(input_node.tag) + + extensions = { ('testns', 'myext') : MyExt() } + + result = tree.xslt(style, extensions=extensions) + self.assertEquals(tags, ['a']) + + def test_extension_element_comment_pi_context(self): + tree = self.parse('') + style = self.parse('''\ + + + + + + b + + + b + +''') + + text = [] + + class MyExt(etree.XSLTExtension): + def execute(self, context, self_node, input_node, output_parent): + text.append(input_node.text) + + extensions = { ('testns', 'myext') : MyExt() } + + result = tree.xslt(style, extensions=extensions) + self.assertEquals(text, ['toast', 'a comment', 'pi']) + + def _test_extension_element_attribute_context(self): + # currently not supported + tree = self.parse('') + style = self.parse('''\ + + + b + + + b + +''') + + text = [] + + class MyExt(etree.XSLTExtension): + def execute(self, context, self_node, attr_value, output_parent): + text.append(attr_value) + + extensions = { ('testns', 'myext') : MyExt() } + + result = tree.xslt(style, extensions=extensions) + self.assertEquals(text, ['A', 'B']) + + def test_extension_element_content(self): + tree = self.parse('B') + style = self.parse('''\ + + + XY + +''') + + class MyExt(etree.XSLTExtension): + def execute(self, context, self_node, input_node, output_parent): + output_parent.extend(list(self_node)[1:]) + + extensions = { ('testns', 'myext') : MyExt() } + + result = tree.xslt(style, extensions=extensions) + self.assertEquals(self._rootstring(result), + _bytes('Y')) + + def test_extension_element_apply_templates(self): + tree = self.parse('B') + style = self.parse('''\ + + + XY + + + XYZ +''') + + class MyExt(etree.XSLTExtension): + def execute(self, context, self_node, input_node, output_parent): + for child in self_node: + for result in self.apply_templates(context, child): + if isinstance(result, basestring): + el = etree.Element("T") + el.text = result + else: + el = result + output_parent.append(el) + + extensions = { ('testns', 'myext') : MyExt() } + + result = tree.xslt(style, extensions=extensions) + self.assertEquals(self._rootstring(result), + _bytes('YXYZ')) + + def test_extension_element_raise(self): + tree = self.parse('B') + style = self.parse('''\ + + + b + +''') + + class MyError(Exception): + pass + + class MyExt(etree.XSLTExtension): + def execute(self, context, self_node, input_node, output_parent): + raise MyError("expected!") + + extensions = { ('testns', 'myext') : MyExt() } + self.assertRaises(MyError, tree.xslt, style, extensions=extensions) + + class Py3XSLTTestCase(HelperTestCase): """XSLT tests for etree under Python 3""" def test_xslt_result_bytes(self): @@ -1452,6 +1542,8 @@ def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeXSLTTestCase)]) + suite.addTests([unittest.makeSuite(ETreeXSLTExtFuncTestCase)]) + suite.addTests([unittest.makeSuite(ETreeXSLTExtElementTestCase)]) if is_python3: suite.addTests([unittest.makeSuite(Py3XSLTTestCase)]) suite.addTests( Modified: lxml/trunk/src/lxml/xsltext.pxi ============================================================================== --- lxml/trunk/src/lxml/xsltext.pxi (original) +++ lxml/trunk/src/lxml/xsltext.pxi Tue Nov 24 18:59:25 2009 @@ -28,7 +28,7 @@ cdef xmlNode* c_node cdef xmlNode* c_next cdef xmlNode* c_context_node - cdef _ReadOnlyElementProxy proxy + cdef _ReadOnlyProxy proxy cdef list results c_context_node = _roNodeOf(node) #assert c_context_node.doc is context._xsltContext.node.doc, \ @@ -81,7 +81,8 @@ cdef XSLTExtension extension cdef python.PyObject* dict_result cdef char* c_uri - cdef _ReadOnlyElementProxy context_node, self_node, output_parent + cdef xmlNode* c_node + cdef _ReadOnlyProxy context_node = None, self_node = None, output_parent c_uri = _getNs(c_inst_node) if c_uri is NULL: # not allowed, and should never happen @@ -91,34 +92,60 @@ return context = <_XSLTContext>c_ctxt.xpathCtxt.userData try: - dict_result = python.PyDict_GetItem( - context._extension_elements, (c_uri, c_inst_node.name)) - if dict_result is NULL: - raise KeyError, \ - u"extension element %s not found" % funicode(c_inst_node.name) - extension = dict_result - try: - self_node = _newReadOnlyProxy(None, c_inst_node) - context_node = _newReadOnlyProxy(self_node, c_context_node) - output_parent = _newAppendOnlyProxy(self_node, c_ctxt.insert) + dict_result = python.PyDict_GetItem( + context._extension_elements, (c_uri, c_inst_node.name)) + if dict_result is NULL: + raise KeyError, \ + u"extension element %s not found" % funicode(c_inst_node.name) + extension = dict_result + + try: + # build the context proxy nodes + self_node = _newReadOnlyProxy(None, c_inst_node) + output_parent = _newAppendOnlyProxy(self_node, c_ctxt.insert) + if c_context_node.type in (tree.XML_DOCUMENT_NODE, + tree.XML_HTML_DOCUMENT_NODE): + c_node = tree.xmlDocGetRootElement(c_context_node) + if c_node is not NULL: + context_node = _newReadOnlyProxy(self_node, c_node) + else: + context_node = None + elif c_context_node.type in (tree.XML_ATTRIBUTE_NODE, + tree.XML_TEXT_NODE, + tree.XML_CDATA_SECTION_NODE): + # this isn't easy to support using read-only + # nodes, as the smart-string factory must + # instantiate the parent proxy somehow... + raise TypeError("Unsupported element type: %d" % c_context_node.type) + else: + context_node = _newReadOnlyProxy(self_node, c_context_node) - context._extension_element_proxy = self_node - extension.execute(context, self_node, context_node, output_parent) - finally: - context._extension_element_proxy = None - if self_node is not None: - _freeReadOnlyProxies(self_node) - except Exception, e: - e = unicode(e).encode(u"UTF-8") - message = python.PyString_FromFormat( - "Error executing extension element '%s': %s", - c_inst_node.name, _cstr(e)) - xslt.xsltTransformError(c_ctxt, NULL, c_inst_node, message) - context._exc._store_raised() + # run the XSLT extension + context._extension_element_proxy = self_node + extension.execute(context, self_node, context_node, output_parent) + finally: + context._extension_element_proxy = None + if self_node is not None: + _freeReadOnlyProxies(self_node) + except Exception, e: + try: + e = unicode(e).encode(u"UTF-8") + except: + e = repr(e).encode(u"UTF-8") + message = python.PyBytes_FromFormat( + "Error executing extension element '%s': %s", + c_inst_node.name, _cstr(e)) + xslt.xsltTransformError(c_ctxt, NULL, c_inst_node, message) + context._exc._store_raised() + except: + # just in case + message = python.PyBytes_FromFormat( + "Error executing extension element '%s'", c_inst_node.name) + xslt.xsltTransformError(c_ctxt, NULL, c_inst_node, message) + context._exc._store_raised() except: - # just in case - message = python.PyString_FromFormat( - "Error executing extension element '%s'", c_inst_node.name) - xslt.xsltTransformError(c_ctxt, NULL, c_inst_node, message) + # no Python functions here - everything can fail... + xslt.xsltTransformError(c_ctxt, NULL, c_inst_node, + "Error during XSLT extension element evaluation") context._exc._store_raised() From scoder at codespeak.net Tue Nov 24 18:59:30 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Nov 2009 18:59:30 +0100 (CET) Subject: [Lxml-checkins] r69605 - in lxml/trunk: . doc Message-ID: <20091124175930.B2046168008@codespeak.net> Author: scoder Date: Tue Nov 24 18:59:30 2009 New Revision: 69605 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/build.txt Log: r5332 at delle: sbehnel | 2009-11-17 20:13:02 +0100 mac build docs: show how to install Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Tue Nov 24 18:59:30 2009 @@ -209,6 +209,8 @@ --libxml2-version=2.7.3 \ --libxslt-version=1.1.24 \ + sudo python setup.py install + Instead of ``build``, you can use any target, like ``bdist_egg`` if you want to use setuptools to build an installable egg. From scoder at codespeak.net Tue Nov 24 18:59:34 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Nov 2009 18:59:34 +0100 (CET) Subject: [Lxml-checkins] r69606 - lxml/trunk Message-ID: <20091124175934.10563168006@codespeak.net> Author: scoder Date: Tue Nov 24 18:59:34 2009 New Revision: 69606 Modified: lxml/trunk/ (props changed) lxml/trunk/buildlibxml.py Log: r5333 at delle: sbehnel | 2009-11-24 18:55:53 +0100 build support for MacOS-X 10.6 Modified: lxml/trunk/buildlibxml.py ============================================================================== --- lxml/trunk/buildlibxml.py (original) +++ lxml/trunk/buildlibxml.py Tue Nov 24 18:59:34 2009 @@ -197,15 +197,23 @@ call_setup = {} env_setup = None if sys.platform in ('darwin',): + import platform # We compile Universal if we are on a machine > 10.3 - major_version = int(os.uname()[2].split('.')[0]) + major_version, minor_version = map(int, platform.mac_ver()[0].split('.')[:2]) if major_version > 7: env = os.environ.copy() - env.update({ - 'CFLAGS' : "-arch ppc -arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk -O2", - 'LDFLAGS' : "-arch ppc -arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk", - 'MACOSX_DEPLOYMENT_TARGET' : "10.3" - }) + if minor_version < 6: + env.update({ + 'CFLAGS' : "-arch ppc -arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk -O2", + 'LDFLAGS' : "-arch ppc -arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk", + 'MACOSX_DEPLOYMENT_TARGET' : "10.3" + }) + else: + env.update({ + 'CFLAGS' : "-arch ppc -arch i386 -arch x86_64 -O2", + 'LDFLAGS' : "-arch ppc -arch i386 -arch x86_64", + 'MACOSX_DEPLOYMENT_TARGET' : "10.6" + }) call_setup['env'] = env configure_cmd = ['./configure', From scoder at codespeak.net Tue Nov 24 18:59:38 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Nov 2009 18:59:38 +0100 (CET) Subject: [Lxml-checkins] r69607 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20091124175938.58F0F168008@codespeak.net> Author: scoder Date: Tue Nov 24 18:59:37 2009 New Revision: 69607 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/cleanup.pxi lxml/trunk/src/lxml/tests/test_etree.py Log: r5334 at delle: sbehnel | 2009-11-24 18:58:17 +0100 major rewrite in cleanup.pxi: faster and less buggy Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Tue Nov 24 18:59:37 2009 @@ -400,6 +400,7 @@ return 0 cdef int _replaceNodeByChildren(_Document doc, xmlNode* c_node) except -1: + # NOTE: this does not deallocate the node, just unlink it! cdef xmlNode* c_parent cdef xmlNode* c_child if c_node.children is NULL: @@ -883,6 +884,35 @@ else: return 0 +cdef inline bint _tagMatchesExactly(xmlNode* c_node, char* c_href, char* c_name): + u"""Tests if the node matches namespace URI and tag name. + + This differs from _tagMatches() in that it does not consider a + NULL value in c_href a wildcard, and that it expects the c_name to + be taken from the doc dict, i.e. it only compares the names by + address. + + A node matches if it matches both c_href and c_name. + + A node matches c_href if any of the following is true: + * its namespace is NULL and c_href is the empty string + * its namespace string equals the c_href string + + A node matches c_name if any of the following is true: + * c_name is NULL + * its name string points to the same address (!) as c_name + """ + cdef char* c_node_href + if c_name is not NULL and c_name is not c_node.name: + return 0 + c_node_href = _getNs(c_node) + if c_href is NULL: + return c_node_href is NULL or c_node_href[0] == '\0' + elif c_node_href is NULL: + return 0 + else: + return cstd.strcmp(c_href, c_node_href) == 0 + cdef int _removeNode(_Document doc, xmlNode* c_node) except -1: u"""Unlink and free a node and subnodes if possible. Otherwise, make sure it's self-contained. @@ -896,6 +926,29 @@ moveNodeToDocument(doc, c_node.doc, c_node) return 0 +cdef int _removeSiblings(xmlNode* c_element, int node_type, bint with_tail) except -1: + cdef xmlNode* c_node + cdef xmlNode* c_next + c_node = c_element.next + while c_node is not NULL: + c_next = _nextElement(c_node) + if c_node.type == node_type: + if with_tail: + _removeText(c_node.next) + tree.xmlUnlinkNode(c_node) + attemptDeallocation(c_node) + c_node = c_next + c_node = c_element.prev + while c_node is not NULL: + c_next = _previousElement(c_node) + if c_node.type == node_type: + if with_tail: + _removeText(c_node.next) + tree.xmlUnlinkNode(c_node) + attemptDeallocation(c_node) + c_node = c_next + return 0 + cdef void _moveTail(xmlNode* c_tail, xmlNode* c_target): cdef xmlNode* c_next # tail support: look for any text nodes trailing this node and Modified: lxml/trunk/src/lxml/cleanup.pxi ============================================================================== --- lxml/trunk/src/lxml/cleanup.pxi (original) +++ lxml/trunk/src/lxml/cleanup.pxi Tue Nov 24 18:59:37 2009 @@ -22,40 +22,56 @@ 'simpleattr', '{http://some/ns}attrname') """ - cdef xmlNode* c_node - cdef xmlAttr* c_attr cdef _Element element cdef list ns_tags - cdef char* c_name + cdef char** c_ns_tags + cdef Py_ssize_t c_tag_count element = _rootNodeOrRaise(tree_or_element) if not attribute_names: return ns_tags = _sortedTagList([ _getNsTag(attr) for attr in attribute_names ]) - ns_tags = [ (ns, tag if tag != '*' else None) + ns_tags = [ (ns, tag if tag != b'*' else None) for ns, tag in ns_tags ] - c_node = element._c_node + # tag names are passes as C pointers as this allows us to take + # them from the doc dict and do pointer comparisons + c_ns_tags = cstd.malloc(sizeof(char*) * len(ns_tags) * 2 + 2) + if c_ns_tags is NULL: + python.PyErr_NoMemory() + + try: + c_tag_count = _mapTagsToCharArray(element._doc._c_doc, ns_tags, c_ns_tags) + if c_tag_count > 0: + _strip_attributes(element._c_node, c_ns_tags, c_tag_count) + finally: + cstd.free(c_ns_tags) + +cdef _strip_attributes(xmlNode* c_node, char** c_ns_tags, Py_ssize_t c_tag_count): + cdef xmlAttr* c_attr + cdef Py_ssize_t i + cdef char* c_href + cdef char* c_name + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) if c_node.type == tree.XML_ELEMENT_NODE: if c_node.properties is not NULL: - for ns, tag in ns_tags: - # must search attributes manually to make sure we only - # match on blank tag names if there is no namespace - c_name = NULL if tag is None else _cstr(tag) + for i in xrange(c_tag_count): + c_href = c_ns_tags[2*i] + c_name = c_ns_tags[2*i+1] + # must compare attributes manually to make sure we + # only match on wildcard tag names if the attribute + # has no namespace c_attr = c_node.properties while c_attr is not NULL: - if ns is None: - if c_attr.ns is NULL or c_attr.ns.href is NULL: - if c_name is NULL or \ - cstd.strcmp(c_attr.name, c_name) == 0: + if c_name is NULL or c_attr.name == c_name: + if c_href is NULL: + if c_attr.ns is NULL or c_attr.ns.href is NULL: tree.xmlRemoveProp(c_attr) break - elif c_attr.ns is not NULL and c_attr.ns.href is not NULL: - if cstd.strcmp(c_attr.ns.href, _cstr(ns)) == 0: - if c_name is NULL or \ - cstd.strcmp(c_attr.name, c_name) == 0: + elif c_attr.ns is not NULL and c_attr.ns.href is not NULL: + if cstd.strcmp(c_attr.ns.href, c_href) == 0: tree.xmlRemoveProp(c_attr) break c_attr = c_attr.next @@ -84,14 +100,11 @@ Comment # comments ) """ - cdef xmlNode* c_node - cdef xmlNode* c_child - cdef xmlNode* c_next - cdef char* c_href - cdef char* c_name cdef _Element element cdef _Document doc cdef list ns_tags + cdef char** c_ns_tags + cdef Py_ssize_t c_tag_count cdef bint strip_comments, strip_pis, strip_entities doc = _documentOrRaise(tree_or_element) @@ -101,7 +114,35 @@ ns_tags = _filterSpecialTagNames( tag_names, &strip_comments, &strip_pis, &strip_entities) - c_node = element._c_node + # tag names are passes as C pointers as this allows us to take + # them from the doc dict and do pointer comparisons + c_ns_tags = cstd.malloc(sizeof(char*) * len(ns_tags) * 2 + 2) + if c_ns_tags is NULL: + python.PyErr_NoMemory() + + if (strip_comments or strip_pis) and isinstance(tree_or_element, _ElementTree): + # include PIs and comments next to the root node + if strip_comments: + _removeSiblings(element._c_node, tree.XML_COMMENT_NODE, with_tail) + if strip_pis: + _removeSiblings(element._c_node, tree.XML_PI_NODE, with_tail) + + try: + c_tag_count = _mapTagsToCharArray(doc._c_doc, ns_tags, c_ns_tags) + if c_tag_count > 0 or strip_comments or strip_pis or strip_entities: + _strip_elements(doc, element._c_node, c_ns_tags, c_tag_count, + strip_comments, strip_pis, strip_entities, with_tail) + finally: + cstd.free(c_ns_tags) + +cdef _strip_elements(_Document doc, xmlNode* c_node, + char** c_ns_tags, Py_ssize_t c_tag_count, + bint strip_comments, bint strip_pis, bint strip_entities, + bint with_tail): + cdef xmlNode* c_child + cdef xmlNode* c_next + cdef Py_ssize_t i + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) if c_node.type == tree.XML_ELEMENT_NODE: # we run through the children here to prevent any problems @@ -109,39 +150,25 @@ # c_node itself c_child = _findChildForwards(c_node, 0) while c_child is not NULL: + c_next = _nextElement(c_child) if c_child.type == tree.XML_ELEMENT_NODE: - for ns, tag in ns_tags: - if ns is None: - # _tagMatches() considers NULL a wildcard - # match but we don't - if c_child.ns is not NULL and c_child.ns.href is not NULL: - continue - c_href = NULL - else: - c_href = _cstr(ns) - c_name = NULL if tag is None else _cstr(tag) - if _tagMatches(c_child, c_href, c_name): - c_next = _nextElement(c_child) + for i in xrange(c_tag_count): + if _tagMatchesExactly(c_child, c_ns_tags[2*i], c_ns_tags[2*i+1]): if not with_tail: tree.xmlUnlinkNode(c_child) _removeNode(doc, c_child) - c_child = c_next break - else: - c_child = _nextElement(c_child) - elif strip_comments and c_child.type == tree.XML_COMMENT_NODE or \ - strip_pis and c_child.type == tree.XML_PI_NODE or \ - strip_entities and c_child.type == tree.XML_ENTITY_REF_NODE: - c_next = _nextElement(c_child) + elif c_child.type == tree.XML_COMMENT_NODE and strip_comments \ + or c_child.type == tree.XML_PI_NODE and strip_pis \ + or c_child.type == tree.XML_ENTITY_REF_NODE and strip_entities: if with_tail: - _removeText(c_next) + _removeText(c_child.next) tree.xmlUnlinkNode(c_child) attemptDeallocation(c_child) - c_child = c_next - else: - c_child = _nextElement(c_child) + c_child = c_next tree.END_FOR_EACH_ELEMENT_FROM(c_node) + def strip_tags(tree_or_element, *tag_names): u"""strip_tags(tree_or_element, *tag_names) @@ -164,15 +191,12 @@ Comment # comments (including their text!) ) """ - cdef xmlNode* c_node - cdef xmlNode* c_child - cdef xmlNode* c_next - cdef char* c_href - cdef char* c_name cdef _Element element cdef _Document doc cdef list ns_tags cdef bint strip_comments, strip_pis, strip_entities + cdef char** c_ns_tags + cdef Py_ssize_t c_tag_count doc = _documentOrRaise(tree_or_element) element = _rootNodeOrRaise(tree_or_element) @@ -181,7 +205,34 @@ ns_tags = _filterSpecialTagNames( tag_names, &strip_comments, &strip_pis, &strip_entities) - c_node = element._c_node + # tag names are passes as C pointers as this allows us to take + # them from the doc dict and do pointer comparisons + c_ns_tags = cstd.malloc(sizeof(char*) * len(ns_tags) * 2 + 2) + if c_ns_tags is NULL: + python.PyErr_NoMemory() + + if (strip_comments or strip_pis) and isinstance(tree_or_element, _ElementTree): + # include PIs and comments next to the root node + if strip_comments: + _removeSiblings(element._c_node, tree.XML_COMMENT_NODE, 0) + if strip_pis: + _removeSiblings(element._c_node, tree.XML_PI_NODE, 0) + + try: + c_tag_count = _mapTagsToCharArray(doc._c_doc, ns_tags, c_ns_tags) + if c_tag_count > 0 or strip_comments or strip_pis or strip_entities: + _strip_tags(doc, element._c_node, c_ns_tags, c_tag_count, + strip_comments, strip_pis, strip_entities) + finally: + cstd.free(c_ns_tags) + +cdef _strip_tags(_Document doc, xmlNode* c_node, + char** c_ns_tags, Py_ssize_t c_tag_count, + bint strip_comments, bint strip_pis, bint strip_entities): + cdef xmlNode* c_child + cdef xmlNode* c_next + cdef Py_ssize_t i + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) if c_node.type == tree.XML_ELEMENT_NODE: # we run through the children here to prevent any problems @@ -190,39 +241,26 @@ c_child = _findChildForwards(c_node, 0) while c_child is not NULL: if c_child.type == tree.XML_ELEMENT_NODE: - for ns, tag in ns_tags: - if ns is None: - # _tagMatches() considers NULL a wildcard - # match but we don't - if c_child.ns is not NULL and c_child.ns.href is not NULL: - continue - c_href = NULL - else: - c_href = _cstr(ns) - c_name = NULL if tag is None else _cstr(tag) - if _tagMatches(c_child, c_href, c_name): - if c_child.children is not NULL: - c_next = _findChildForwards(c_child, 0) - else: - c_next = _nextElement(c_child) + for i in xrange(c_tag_count): + if _tagMatchesExactly(c_child, c_ns_tags[2*i], c_ns_tags[2*i+1]): + c_next = _findChildForwards(c_child, 0) or _nextElement(c_child) _replaceNodeByChildren(doc, c_child) if not attemptDeallocation(c_child): - if c_child.ns is not NULL: + if c_child.nsDef is not NULL: # make namespaces absolute moveNodeToDocument(doc, doc._c_doc, c_child) c_child = c_next break else: - c_child = c_child.next - elif strip_comments and c_child.type == tree.XML_COMMENT_NODE or \ - strip_pis and c_child.type == tree.XML_PI_NODE or \ - strip_entities and c_child.type == tree.XML_ENTITY_REF_NODE: + c_child = _nextElement(c_child) + else: c_next = _nextElement(c_child) - tree.xmlUnlinkNode(c_child) - attemptDeallocation(c_child) + if c_child.type == tree.XML_COMMENT_NODE and strip_comments \ + or c_child.type == tree.XML_PI_NODE and strip_pis \ + or c_child.type == tree.XML_ENTITY_REF_NODE and strip_entities: + tree.xmlUnlinkNode(c_child) + attemptDeallocation(c_child) c_child = c_next - else: - c_child = _nextElement(c_child) tree.END_FOR_EACH_ELEMENT_FROM(c_node) @@ -235,7 +273,7 @@ cdef list decorated_list cdef tuple ns_tag cdef Py_ssize_t i - decorated_list = [ (ns_tag[0] or '', ns_tag[1], i, ns_tag) + decorated_list = [ (ns_tag[0] or b'', ns_tag[1], i, ns_tag) for i, ns_tag in enumerate(l) ] decorated_list.sort() return [ item[-1] for item in decorated_list ] @@ -246,18 +284,38 @@ pis[0] = 0 entities[0] = 0 - if Comment in tag_names: - comments[0] = 1 - tag_names = [ tag for tag in tag_names - if tag is not Comment ] - if ProcessingInstruction in tag_names: - pis[0] = 1 - tag_names = [ tag for tag in tag_names - if tag is not ProcessingInstruction ] - if Entity in tag_names: - entities[0] = 1 - tag_names = [ tag for tag in tag_names - if tag is not Entity ] - ns_tags = _sortedTagList([ _getNsTag(tag) for tag in tag_names ]) + ns_tags = [] + for tag in tag_names: + if tag is Comment: + comments[0] = 1 + elif tag is ProcessingInstruction: + pis[0] = 1 + elif tag is Entity: + entities[0] = 1 + else: + ns_tags.append(_getNsTag(tag)) + return [ (ns, tag if tag != '*' else None) - for ns, tag in ns_tags ] + for ns, tag in _sortedTagList(ns_tags) ] + +cdef Py_ssize_t _mapTagsToCharArray(xmlDoc* c_doc, list ns_tags, + char** c_ns_tags) except -1: + cdef Py_ssize_t count = 0 + cdef char* c_tag + for ns, tag in ns_tags: + if ns is None: + c_ns_tags[0] = NULL + else: + c_ns_tags[0] = _cstr(ns) + if tag is None: + c_ns_tags[1] = NULL + else: + c_tag = _cstr(tag) + c_ns_tags[1] = tree.xmlDictExists( + c_doc.dict, c_tag, cstd.strlen(c_tag)) + if c_ns_tags[1] == NULL: + # not in the dict => not in the document + continue + c_ns_tags += 2 + count += 1 + return count Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Tue Nov 24 18:59:37 2009 @@ -7,7 +7,14 @@ test_elementtree """ -import os.path, unittest, copy, sys, operator, tempfile, gzip +import os.path +import unittest +import copy +import sys +import re +import operator +import tempfile +import gzip this_dir = os.path.dirname(__file__) if this_dir not in sys.path: @@ -312,6 +319,85 @@ self.assertEquals(_bytes('TESTABCTBTATXABTCTATXT'), self._writeElement(root)) + def test_strip_tags_pi_comment(self): + XML = self.etree.XML + PI = self.etree.ProcessingInstruction + Comment = self.etree.Comment + xml = _bytes('\n\nTESTXT\n\n') + + root = XML(xml) + self.etree.strip_tags(root, PI) + self.assertEquals(_bytes('\n\nTESTXT\n\n'), + self._writeElement(root)) + + root = XML(xml) + self.etree.strip_tags(root, Comment) + self.assertEquals(_bytes('\n\nTESTXT\n\n'), + self._writeElement(root)) + + root = XML(xml) + self.etree.strip_tags(root, PI, Comment) + self.assertEquals(_bytes('\n\nTESTXT\n\n'), + self._writeElement(root)) + + root = XML(xml) + self.etree.strip_tags(root, Comment, PI) + self.assertEquals(_bytes('\n\nTESTXT\n\n'), + self._writeElement(root)) + + def test_strip_tags_pi_comment_all(self): + XML = self.etree.XML + ElementTree = self.etree.ElementTree + PI = self.etree.ProcessingInstruction + Comment = self.etree.Comment + xml = _bytes('\n\nTESTXT\n\n') + + root = XML(xml) + self.etree.strip_tags(ElementTree(root), PI) + self.assertEquals(_bytes('\nTESTXT\n'), + self._writeElement(root)) + + root = XML(xml) + self.etree.strip_tags(ElementTree(root), Comment) + self.assertEquals(_bytes('\nTESTXT\n'), + self._writeElement(root)) + + root = XML(xml) + self.etree.strip_tags(ElementTree(root), PI, Comment) + self.assertEquals(_bytes('TESTXT'), + self._writeElement(root)) + + root = XML(xml) + self.etree.strip_tags(ElementTree(root), Comment, PI) + self.assertEquals(_bytes('TESTXT'), + self._writeElement(root)) + + def test_strip_tags_doc_style(self): + XML = self.etree.XML + xml = _bytes(''' +
+
+ I like sheep. +
+ I like lots of sheep. +
+ Click here for those sheep. +
+
+
+ '''.strip()) + + root = XML(xml) + self.etree.strip_tags(root, 'a') + self.assertEquals(re.sub(_bytes(']*>'), '', xml).replace('
', '

'), + self._writeElement(root)) + + root = XML(xml) + self.etree.strip_tags(root, 'a', 'br') + self.assertEquals(re.sub(_bytes(']*>'), '', + re.sub(_bytes(']*>'), '', xml)), + self._writeElement(root)) + def test_strip_tags_ns(self): XML = self.etree.XML xml = _bytes('TESTABCTBTATXABTCTATXT') From scoder at codespeak.net Tue Nov 24 19:09:44 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Nov 2009 19:09:44 +0100 (CET) Subject: [Lxml-checkins] r69609 - in lxml/branch/lxml-2.2: . src/lxml src/lxml/tests Message-ID: <20091124180944.0B723168008@codespeak.net> Author: scoder Date: Tue Nov 24 19:09:43 2009 New Revision: 69609 Modified: lxml/branch/lxml-2.2/ (props changed) lxml/branch/lxml-2.2/CHANGES.txt lxml/branch/lxml-2.2/INSTALL.txt (props changed) lxml/branch/lxml-2.2/src/lxml/apihelpers.pxi lxml/branch/lxml-2.2/src/lxml/cleanup.pxi lxml/branch/lxml-2.2/src/lxml/tests/test_etree.py Log: trunk merge: fix strip_tags() and strip_elements() Modified: lxml/branch/lxml-2.2/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.2/CHANGES.txt (original) +++ lxml/branch/lxml-2.2/CHANGES.txt Tue Nov 24 19:09:43 2009 @@ -14,6 +14,9 @@ Bugs fixed ---------- +* The functions ``strip_tags()`` and ``strip_elements()`` in + ``lxml.etree`` did not remove all occurrences of a tag in all cases. + * Crash in XSLT extension elements when the XSLT context node is not an element. Modified: lxml/branch/lxml-2.2/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/lxml-2.2/src/lxml/apihelpers.pxi (original) +++ lxml/branch/lxml-2.2/src/lxml/apihelpers.pxi Tue Nov 24 19:09:43 2009 @@ -395,6 +395,7 @@ return 0 cdef int _replaceNodeByChildren(_Document doc, xmlNode* c_node) except -1: + # NOTE: this does not deallocate the node, just unlink it! cdef xmlNode* c_parent cdef xmlNode* c_child if c_node.children is NULL: @@ -879,6 +880,35 @@ else: return 0 +cdef inline bint _tagMatchesExactly(xmlNode* c_node, char* c_href, char* c_name): + u"""Tests if the node matches namespace URI and tag name. + + This differs from _tagMatches() in that it does not consider a + NULL value in c_href a wildcard, and that it expects the c_name to + be taken from the doc dict, i.e. it only compares the names by + address. + + A node matches if it matches both c_href and c_name. + + A node matches c_href if any of the following is true: + * its namespace is NULL and c_href is the empty string + * its namespace string equals the c_href string + + A node matches c_name if any of the following is true: + * c_name is NULL + * its name string points to the same address (!) as c_name + """ + cdef char* c_node_href + if c_name is not NULL and c_name is not c_node.name: + return 0 + c_node_href = _getNs(c_node) + if c_href is NULL: + return c_node_href is NULL or c_node_href[0] == '\0' + elif c_node_href is NULL: + return 0 + else: + return cstd.strcmp(c_href, c_node_href) == 0 + cdef int _removeNode(_Document doc, xmlNode* c_node) except -1: u"""Unlink and free a node and subnodes if possible. Otherwise, make sure it's self-contained. @@ -892,6 +922,29 @@ moveNodeToDocument(doc, c_node.doc, c_node) return 0 +cdef int _removeSiblings(xmlNode* c_element, int node_type, bint with_tail) except -1: + cdef xmlNode* c_node + cdef xmlNode* c_next + c_node = c_element.next + while c_node is not NULL: + c_next = _nextElement(c_node) + if c_node.type == node_type: + if with_tail: + _removeText(c_node.next) + tree.xmlUnlinkNode(c_node) + attemptDeallocation(c_node) + c_node = c_next + c_node = c_element.prev + while c_node is not NULL: + c_next = _previousElement(c_node) + if c_node.type == node_type: + if with_tail: + _removeText(c_node.next) + tree.xmlUnlinkNode(c_node) + attemptDeallocation(c_node) + c_node = c_next + return 0 + cdef void _moveTail(xmlNode* c_tail, xmlNode* c_target): cdef xmlNode* c_next # tail support: look for any text nodes trailing this node and Modified: lxml/branch/lxml-2.2/src/lxml/cleanup.pxi ============================================================================== --- lxml/branch/lxml-2.2/src/lxml/cleanup.pxi (original) +++ lxml/branch/lxml-2.2/src/lxml/cleanup.pxi Tue Nov 24 19:09:43 2009 @@ -22,40 +22,56 @@ 'simpleattr', '{http://some/ns}attrname') """ - cdef xmlNode* c_node - cdef xmlAttr* c_attr cdef _Element element cdef list ns_tags - cdef char* c_name + cdef char** c_ns_tags + cdef Py_ssize_t c_tag_count element = _rootNodeOrRaise(tree_or_element) if not attribute_names: return ns_tags = _sortedTagList([ _getNsTag(attr) for attr in attribute_names ]) - ns_tags = [ (ns, tag if tag != '*' else None) + ns_tags = [ (ns, tag if tag != b'*' else None) for ns, tag in ns_tags ] - c_node = element._c_node + # tag names are passes as C pointers as this allows us to take + # them from the doc dict and do pointer comparisons + c_ns_tags = cstd.malloc(sizeof(char*) * len(ns_tags) * 2 + 2) + if c_ns_tags is NULL: + python.PyErr_NoMemory() + + try: + c_tag_count = _mapTagsToCharArray(element._doc._c_doc, ns_tags, c_ns_tags) + if c_tag_count > 0: + _strip_attributes(element._c_node, c_ns_tags, c_tag_count) + finally: + cstd.free(c_ns_tags) + +cdef _strip_attributes(xmlNode* c_node, char** c_ns_tags, Py_ssize_t c_tag_count): + cdef xmlAttr* c_attr + cdef Py_ssize_t i + cdef char* c_href + cdef char* c_name + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) if c_node.type == tree.XML_ELEMENT_NODE: if c_node.properties is not NULL: - for ns, tag in ns_tags: - # must search attributes manually to make sure we only - # match on blank tag names if there is no namespace - c_name = NULL if tag is None else _cstr(tag) + for i in xrange(c_tag_count): + c_href = c_ns_tags[2*i] + c_name = c_ns_tags[2*i+1] + # must compare attributes manually to make sure we + # only match on wildcard tag names if the attribute + # has no namespace c_attr = c_node.properties while c_attr is not NULL: - if ns is None: - if c_attr.ns is NULL or c_attr.ns.href is NULL: - if c_name is NULL or \ - cstd.strcmp(c_attr.name, c_name) == 0: + if c_name is NULL or c_attr.name == c_name: + if c_href is NULL: + if c_attr.ns is NULL or c_attr.ns.href is NULL: tree.xmlRemoveProp(c_attr) break - elif c_attr.ns is not NULL and c_attr.ns.href is not NULL: - if cstd.strcmp(c_attr.ns.href, _cstr(ns)) == 0: - if c_name is NULL or \ - cstd.strcmp(c_attr.name, c_name) == 0: + elif c_attr.ns is not NULL and c_attr.ns.href is not NULL: + if cstd.strcmp(c_attr.ns.href, c_href) == 0: tree.xmlRemoveProp(c_attr) break c_attr = c_attr.next @@ -84,14 +100,11 @@ Comment # comments ) """ - cdef xmlNode* c_node - cdef xmlNode* c_child - cdef xmlNode* c_next - cdef char* c_href - cdef char* c_name cdef _Element element cdef _Document doc cdef list ns_tags + cdef char** c_ns_tags + cdef Py_ssize_t c_tag_count cdef bint strip_comments, strip_pis, strip_entities doc = _documentOrRaise(tree_or_element) @@ -101,7 +114,35 @@ ns_tags = _filterSpecialTagNames( tag_names, &strip_comments, &strip_pis, &strip_entities) - c_node = element._c_node + # tag names are passes as C pointers as this allows us to take + # them from the doc dict and do pointer comparisons + c_ns_tags = cstd.malloc(sizeof(char*) * len(ns_tags) * 2 + 2) + if c_ns_tags is NULL: + python.PyErr_NoMemory() + + if (strip_comments or strip_pis) and isinstance(tree_or_element, _ElementTree): + # include PIs and comments next to the root node + if strip_comments: + _removeSiblings(element._c_node, tree.XML_COMMENT_NODE, with_tail) + if strip_pis: + _removeSiblings(element._c_node, tree.XML_PI_NODE, with_tail) + + try: + c_tag_count = _mapTagsToCharArray(doc._c_doc, ns_tags, c_ns_tags) + if c_tag_count > 0 or strip_comments or strip_pis or strip_entities: + _strip_elements(doc, element._c_node, c_ns_tags, c_tag_count, + strip_comments, strip_pis, strip_entities, with_tail) + finally: + cstd.free(c_ns_tags) + +cdef _strip_elements(_Document doc, xmlNode* c_node, + char** c_ns_tags, Py_ssize_t c_tag_count, + bint strip_comments, bint strip_pis, bint strip_entities, + bint with_tail): + cdef xmlNode* c_child + cdef xmlNode* c_next + cdef Py_ssize_t i + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) if c_node.type == tree.XML_ELEMENT_NODE: # we run through the children here to prevent any problems @@ -109,39 +150,25 @@ # c_node itself c_child = _findChildForwards(c_node, 0) while c_child is not NULL: + c_next = _nextElement(c_child) if c_child.type == tree.XML_ELEMENT_NODE: - for ns, tag in ns_tags: - if ns is None: - # _tagMatches() considers NULL a wildcard - # match but we don't - if c_child.ns is not NULL and c_child.ns.href is not NULL: - continue - c_href = NULL - else: - c_href = _cstr(ns) - c_name = NULL if tag is None else _cstr(tag) - if _tagMatches(c_child, c_href, c_name): - c_next = _nextElement(c_child) + for i in xrange(c_tag_count): + if _tagMatchesExactly(c_child, c_ns_tags[2*i], c_ns_tags[2*i+1]): if not with_tail: tree.xmlUnlinkNode(c_child) _removeNode(doc, c_child) - c_child = c_next break - else: - c_child = _nextElement(c_child) - elif strip_comments and c_child.type == tree.XML_COMMENT_NODE or \ - strip_pis and c_child.type == tree.XML_PI_NODE or \ - strip_entities and c_child.type == tree.XML_ENTITY_REF_NODE: - c_next = _nextElement(c_child) + elif c_child.type == tree.XML_COMMENT_NODE and strip_comments \ + or c_child.type == tree.XML_PI_NODE and strip_pis \ + or c_child.type == tree.XML_ENTITY_REF_NODE and strip_entities: if with_tail: - _removeText(c_next) + _removeText(c_child.next) tree.xmlUnlinkNode(c_child) attemptDeallocation(c_child) - c_child = c_next - else: - c_child = _nextElement(c_child) + c_child = c_next tree.END_FOR_EACH_ELEMENT_FROM(c_node) + def strip_tags(tree_or_element, *tag_names): u"""strip_tags(tree_or_element, *tag_names) @@ -164,15 +191,12 @@ Comment # comments (including their text!) ) """ - cdef xmlNode* c_node - cdef xmlNode* c_child - cdef xmlNode* c_next - cdef char* c_href - cdef char* c_name cdef _Element element cdef _Document doc cdef list ns_tags cdef bint strip_comments, strip_pis, strip_entities + cdef char** c_ns_tags + cdef Py_ssize_t c_tag_count doc = _documentOrRaise(tree_or_element) element = _rootNodeOrRaise(tree_or_element) @@ -181,7 +205,34 @@ ns_tags = _filterSpecialTagNames( tag_names, &strip_comments, &strip_pis, &strip_entities) - c_node = element._c_node + # tag names are passes as C pointers as this allows us to take + # them from the doc dict and do pointer comparisons + c_ns_tags = cstd.malloc(sizeof(char*) * len(ns_tags) * 2 + 2) + if c_ns_tags is NULL: + python.PyErr_NoMemory() + + if (strip_comments or strip_pis) and isinstance(tree_or_element, _ElementTree): + # include PIs and comments next to the root node + if strip_comments: + _removeSiblings(element._c_node, tree.XML_COMMENT_NODE, 0) + if strip_pis: + _removeSiblings(element._c_node, tree.XML_PI_NODE, 0) + + try: + c_tag_count = _mapTagsToCharArray(doc._c_doc, ns_tags, c_ns_tags) + if c_tag_count > 0 or strip_comments or strip_pis or strip_entities: + _strip_tags(doc, element._c_node, c_ns_tags, c_tag_count, + strip_comments, strip_pis, strip_entities) + finally: + cstd.free(c_ns_tags) + +cdef _strip_tags(_Document doc, xmlNode* c_node, + char** c_ns_tags, Py_ssize_t c_tag_count, + bint strip_comments, bint strip_pis, bint strip_entities): + cdef xmlNode* c_child + cdef xmlNode* c_next + cdef Py_ssize_t i + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) if c_node.type == tree.XML_ELEMENT_NODE: # we run through the children here to prevent any problems @@ -190,39 +241,26 @@ c_child = _findChildForwards(c_node, 0) while c_child is not NULL: if c_child.type == tree.XML_ELEMENT_NODE: - for ns, tag in ns_tags: - if ns is None: - # _tagMatches() considers NULL a wildcard - # match but we don't - if c_child.ns is not NULL and c_child.ns.href is not NULL: - continue - c_href = NULL - else: - c_href = _cstr(ns) - c_name = NULL if tag is None else _cstr(tag) - if _tagMatches(c_child, c_href, c_name): - if c_child.children is not NULL: - c_next = _findChildForwards(c_child, 0) - else: - c_next = _nextElement(c_child) + for i in xrange(c_tag_count): + if _tagMatchesExactly(c_child, c_ns_tags[2*i], c_ns_tags[2*i+1]): + c_next = _findChildForwards(c_child, 0) or _nextElement(c_child) _replaceNodeByChildren(doc, c_child) if not attemptDeallocation(c_child): - if c_child.ns is not NULL: + if c_child.nsDef is not NULL: # make namespaces absolute moveNodeToDocument(doc, doc._c_doc, c_child) c_child = c_next break else: - c_child = c_child.next - elif strip_comments and c_child.type == tree.XML_COMMENT_NODE or \ - strip_pis and c_child.type == tree.XML_PI_NODE or \ - strip_entities and c_child.type == tree.XML_ENTITY_REF_NODE: + c_child = _nextElement(c_child) + else: c_next = _nextElement(c_child) - tree.xmlUnlinkNode(c_child) - attemptDeallocation(c_child) + if c_child.type == tree.XML_COMMENT_NODE and strip_comments \ + or c_child.type == tree.XML_PI_NODE and strip_pis \ + or c_child.type == tree.XML_ENTITY_REF_NODE and strip_entities: + tree.xmlUnlinkNode(c_child) + attemptDeallocation(c_child) c_child = c_next - else: - c_child = _nextElement(c_child) tree.END_FOR_EACH_ELEMENT_FROM(c_node) @@ -235,7 +273,7 @@ cdef list decorated_list cdef tuple ns_tag cdef Py_ssize_t i - decorated_list = [ (ns_tag[0] or '', ns_tag[1], i, ns_tag) + decorated_list = [ (ns_tag[0] or b'', ns_tag[1], i, ns_tag) for i, ns_tag in enumerate(l) ] decorated_list.sort() return [ item[-1] for item in decorated_list ] @@ -246,18 +284,38 @@ pis[0] = 0 entities[0] = 0 - if Comment in tag_names: - comments[0] = 1 - tag_names = [ tag for tag in tag_names - if tag is not Comment ] - if ProcessingInstruction in tag_names: - pis[0] = 1 - tag_names = [ tag for tag in tag_names - if tag is not ProcessingInstruction ] - if Entity in tag_names: - entities[0] = 1 - tag_names = [ tag for tag in tag_names - if tag is not Entity ] - ns_tags = _sortedTagList([ _getNsTag(tag) for tag in tag_names ]) + ns_tags = [] + for tag in tag_names: + if tag is Comment: + comments[0] = 1 + elif tag is ProcessingInstruction: + pis[0] = 1 + elif tag is Entity: + entities[0] = 1 + else: + ns_tags.append(_getNsTag(tag)) + return [ (ns, tag if tag != '*' else None) - for ns, tag in ns_tags ] + for ns, tag in _sortedTagList(ns_tags) ] + +cdef Py_ssize_t _mapTagsToCharArray(xmlDoc* c_doc, list ns_tags, + char** c_ns_tags) except -1: + cdef Py_ssize_t count = 0 + cdef char* c_tag + for ns, tag in ns_tags: + if ns is None: + c_ns_tags[0] = NULL + else: + c_ns_tags[0] = _cstr(ns) + if tag is None: + c_ns_tags[1] = NULL + else: + c_tag = _cstr(tag) + c_ns_tags[1] = tree.xmlDictExists( + c_doc.dict, c_tag, cstd.strlen(c_tag)) + if c_ns_tags[1] == NULL: + # not in the dict => not in the document + continue + c_ns_tags += 2 + count += 1 + return count Modified: lxml/branch/lxml-2.2/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/lxml-2.2/src/lxml/tests/test_etree.py (original) +++ lxml/branch/lxml-2.2/src/lxml/tests/test_etree.py Tue Nov 24 19:09:43 2009 @@ -7,7 +7,14 @@ test_elementtree """ -import os.path, unittest, copy, sys, operator, tempfile, gzip +import os.path +import unittest +import copy +import sys +import re +import operator +import tempfile +import gzip this_dir = os.path.dirname(__file__) if this_dir not in sys.path: @@ -312,6 +319,85 @@ self.assertEquals(_bytes('TESTABCTBTATXABTCTATXT'), self._writeElement(root)) + def test_strip_tags_pi_comment(self): + XML = self.etree.XML + PI = self.etree.ProcessingInstruction + Comment = self.etree.Comment + xml = _bytes('\n\nTESTXT\n\n') + + root = XML(xml) + self.etree.strip_tags(root, PI) + self.assertEquals(_bytes('\n\nTESTXT\n\n'), + self._writeElement(root)) + + root = XML(xml) + self.etree.strip_tags(root, Comment) + self.assertEquals(_bytes('\n\nTESTXT\n\n'), + self._writeElement(root)) + + root = XML(xml) + self.etree.strip_tags(root, PI, Comment) + self.assertEquals(_bytes('\n\nTESTXT\n\n'), + self._writeElement(root)) + + root = XML(xml) + self.etree.strip_tags(root, Comment, PI) + self.assertEquals(_bytes('\n\nTESTXT\n\n'), + self._writeElement(root)) + + def test_strip_tags_pi_comment_all(self): + XML = self.etree.XML + ElementTree = self.etree.ElementTree + PI = self.etree.ProcessingInstruction + Comment = self.etree.Comment + xml = _bytes('\n\nTESTXT\n\n') + + root = XML(xml) + self.etree.strip_tags(ElementTree(root), PI) + self.assertEquals(_bytes('\nTESTXT\n'), + self._writeElement(root)) + + root = XML(xml) + self.etree.strip_tags(ElementTree(root), Comment) + self.assertEquals(_bytes('\nTESTXT\n'), + self._writeElement(root)) + + root = XML(xml) + self.etree.strip_tags(ElementTree(root), PI, Comment) + self.assertEquals(_bytes('TESTXT'), + self._writeElement(root)) + + root = XML(xml) + self.etree.strip_tags(ElementTree(root), Comment, PI) + self.assertEquals(_bytes('TESTXT'), + self._writeElement(root)) + + def test_strip_tags_doc_style(self): + XML = self.etree.XML + xml = _bytes(''' +
+
+ I like sheep. +
+ I like lots of sheep. +
+ Click here for those sheep. +
+
+
+ '''.strip()) + + root = XML(xml) + self.etree.strip_tags(root, 'a') + self.assertEquals(re.sub(_bytes(']*>'), '', xml).replace('
', '

'), + self._writeElement(root)) + + root = XML(xml) + self.etree.strip_tags(root, 'a', 'br') + self.assertEquals(re.sub(_bytes(']*>'), '', + re.sub(_bytes(']*>'), '', xml)), + self._writeElement(root)) + def test_strip_tags_ns(self): XML = self.etree.XML xml = _bytes('TESTABCTBTATXABTCTATXT') From scoder at codespeak.net Tue Nov 24 21:39:02 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Nov 2009 21:39:02 +0100 (CET) Subject: [Lxml-checkins] r69616 - in lxml/trunk: . src/lxml Message-ID: <20091124203902.53EE4168021@codespeak.net> Author: scoder Date: Tue Nov 24 21:39:01 2009 New Revision: 69616 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/cleanup.pxi Log: r5351 at delle: sbehnel | 2009-11-24 21:38:56 +0100 prevent memory leaks on errors Modified: lxml/trunk/src/lxml/cleanup.pxi ============================================================================== --- lxml/trunk/src/lxml/cleanup.pxi (original) +++ lxml/trunk/src/lxml/cleanup.pxi Tue Nov 24 21:39:01 2009 @@ -114,12 +114,6 @@ ns_tags = _filterSpecialTagNames( tag_names, &strip_comments, &strip_pis, &strip_entities) - # tag names are passes as C pointers as this allows us to take - # them from the doc dict and do pointer comparisons - c_ns_tags = cstd.malloc(sizeof(char*) * len(ns_tags) * 2 + 2) - if c_ns_tags is NULL: - python.PyErr_NoMemory() - if (strip_comments or strip_pis) and isinstance(tree_or_element, _ElementTree): # include PIs and comments next to the root node if strip_comments: @@ -127,6 +121,12 @@ if strip_pis: _removeSiblings(element._c_node, tree.XML_PI_NODE, with_tail) + # tag names are passes as C pointers as this allows us to take + # them from the doc dict and do pointer comparisons + c_ns_tags = cstd.malloc(sizeof(char*) * len(ns_tags) * 2 + 2) + if c_ns_tags is NULL: + python.PyErr_NoMemory() + try: c_tag_count = _mapTagsToCharArray(doc._c_doc, ns_tags, c_ns_tags) if c_tag_count > 0 or strip_comments or strip_pis or strip_entities: @@ -205,12 +205,6 @@ ns_tags = _filterSpecialTagNames( tag_names, &strip_comments, &strip_pis, &strip_entities) - # tag names are passes as C pointers as this allows us to take - # them from the doc dict and do pointer comparisons - c_ns_tags = cstd.malloc(sizeof(char*) * len(ns_tags) * 2 + 2) - if c_ns_tags is NULL: - python.PyErr_NoMemory() - if (strip_comments or strip_pis) and isinstance(tree_or_element, _ElementTree): # include PIs and comments next to the root node if strip_comments: @@ -218,6 +212,12 @@ if strip_pis: _removeSiblings(element._c_node, tree.XML_PI_NODE, 0) + # tag names are passes as C pointers as this allows us to take + # them from the doc dict and do pointer comparisons + c_ns_tags = cstd.malloc(sizeof(char*) * len(ns_tags) * 2 + 2) + if c_ns_tags is NULL: + python.PyErr_NoMemory() + try: c_tag_count = _mapTagsToCharArray(doc._c_doc, ns_tags, c_ns_tags) if c_tag_count > 0 or strip_comments or strip_pis or strip_entities: From scoder at codespeak.net Tue Nov 24 21:40:44 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Nov 2009 21:40:44 +0100 (CET) Subject: [Lxml-checkins] r69617 - in lxml/branch/lxml-2.2: . src/lxml Message-ID: <20091124204044.83562168021@codespeak.net> Author: scoder Date: Tue Nov 24 21:40:43 2009 New Revision: 69617 Modified: lxml/branch/lxml-2.2/ (props changed) lxml/branch/lxml-2.2/INSTALL.txt (props changed) lxml/branch/lxml-2.2/src/lxml/cleanup.pxi Log: trunk merge: fix memory leak in error case Modified: lxml/branch/lxml-2.2/src/lxml/cleanup.pxi ============================================================================== --- lxml/branch/lxml-2.2/src/lxml/cleanup.pxi (original) +++ lxml/branch/lxml-2.2/src/lxml/cleanup.pxi Tue Nov 24 21:40:43 2009 @@ -114,12 +114,6 @@ ns_tags = _filterSpecialTagNames( tag_names, &strip_comments, &strip_pis, &strip_entities) - # tag names are passes as C pointers as this allows us to take - # them from the doc dict and do pointer comparisons - c_ns_tags = cstd.malloc(sizeof(char*) * len(ns_tags) * 2 + 2) - if c_ns_tags is NULL: - python.PyErr_NoMemory() - if (strip_comments or strip_pis) and isinstance(tree_or_element, _ElementTree): # include PIs and comments next to the root node if strip_comments: @@ -127,6 +121,12 @@ if strip_pis: _removeSiblings(element._c_node, tree.XML_PI_NODE, with_tail) + # tag names are passes as C pointers as this allows us to take + # them from the doc dict and do pointer comparisons + c_ns_tags = cstd.malloc(sizeof(char*) * len(ns_tags) * 2 + 2) + if c_ns_tags is NULL: + python.PyErr_NoMemory() + try: c_tag_count = _mapTagsToCharArray(doc._c_doc, ns_tags, c_ns_tags) if c_tag_count > 0 or strip_comments or strip_pis or strip_entities: @@ -205,12 +205,6 @@ ns_tags = _filterSpecialTagNames( tag_names, &strip_comments, &strip_pis, &strip_entities) - # tag names are passes as C pointers as this allows us to take - # them from the doc dict and do pointer comparisons - c_ns_tags = cstd.malloc(sizeof(char*) * len(ns_tags) * 2 + 2) - if c_ns_tags is NULL: - python.PyErr_NoMemory() - if (strip_comments or strip_pis) and isinstance(tree_or_element, _ElementTree): # include PIs and comments next to the root node if strip_comments: @@ -218,6 +212,12 @@ if strip_pis: _removeSiblings(element._c_node, tree.XML_PI_NODE, 0) + # tag names are passes as C pointers as this allows us to take + # them from the doc dict and do pointer comparisons + c_ns_tags = cstd.malloc(sizeof(char*) * len(ns_tags) * 2 + 2) + if c_ns_tags is NULL: + python.PyErr_NoMemory() + try: c_tag_count = _mapTagsToCharArray(doc._c_doc, ns_tags, c_ns_tags) if c_tag_count > 0 or strip_comments or strip_pis or strip_entities: