From ianb at codespeak.net Fri Feb 1 02:38:31 2008
From: ianb at codespeak.net (ianb at codespeak.net)
Date: Fri, 1 Feb 2008 02:38:31 +0100 (CET)
Subject: [Lxml-checkins] r51162 - lxml/trunk/src/lxml/html
Message-ID: <20080201013831.C61DD1684D6@codespeak.net>
Author: ianb
Date: Fri Feb 1 02:38:30 2008
New Revision: 51162
Added:
lxml/trunk/src/lxml/html/_dictmixin.py (contents, props changed)
Log:
Add DictMixin backport
Added: lxml/trunk/src/lxml/html/_dictmixin.py
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/html/_dictmixin.py Fri Feb 1 02:38:30 2008
@@ -0,0 +1,102 @@
+"""
+A backport of UserDict.DictMixin for pre-python-2.4
+"""
+__all__ = ['DictMixin']
+
+try:
+ from UserDict import DictMixin
+except ImportError:
+ class DictMixin:
+ # Mixin defining all dictionary methods for classes that already have
+ # a minimum dictionary interface including getitem, setitem, delitem,
+ # and keys. Without knowledge of the subclass constructor, the mixin
+ # does not define __init__() or copy(). In addition to the four base
+ # methods, progressively more efficiency comes with defining
+ # __contains__(), __iter__(), and iteritems().
+
+ # second level definitions support higher levels
+ def __iter__(self):
+ for k in self.keys():
+ yield k
+ def has_key(self, key):
+ try:
+ value = self[key]
+ except KeyError:
+ return False
+ return True
+ def __contains__(self, key):
+ return self.has_key(key)
+
+ # third level takes advantage of second level definitions
+ def iteritems(self):
+ for k in self:
+ yield (k, self[k])
+ def iterkeys(self):
+ return self.__iter__()
+
+ # fourth level uses definitions from lower levels
+ def itervalues(self):
+ for _, v in self.iteritems():
+ yield v
+ def values(self):
+ return [v for _, v in self.iteritems()]
+ def items(self):
+ return list(self.iteritems())
+ def clear(self):
+ for key in self.keys():
+ del self[key]
+ def setdefault(self, key, default=None):
+ try:
+ return self[key]
+ except KeyError:
+ self[key] = default
+ return default
+ def pop(self, key, *args):
+ if len(args) > 1:
+ raise TypeError, "pop expected at most 2 arguments, got "\
+ + repr(1 + len(args))
+ try:
+ value = self[key]
+ except KeyError:
+ if args:
+ return args[0]
+ raise
+ del self[key]
+ return value
+ def popitem(self):
+ try:
+ k, v = self.iteritems().next()
+ except StopIteration:
+ raise KeyError, 'container is empty'
+ del self[k]
+ return (k, v)
+ def update(self, other=None, **kwargs):
+ # Make progressively weaker assumptions about "other"
+ if other is None:
+ pass
+ elif hasattr(other, 'iteritems'): # iteritems saves memory and lookups
+ for k, v in other.iteritems():
+ self[k] = v
+ elif hasattr(other, 'keys'):
+ for k in other.keys():
+ self[k] = other[k]
+ else:
+ for k, v in other:
+ self[k] = v
+ if kwargs:
+ self.update(kwargs)
+ def get(self, key, default=None):
+ try:
+ return self[key]
+ except KeyError:
+ return default
+ def __repr__(self):
+ return repr(dict(self.iteritems()))
+ def __cmp__(self, other):
+ if other is None:
+ return 1
+ if isinstance(other, DictMixin):
+ other = dict(other.iteritems())
+ return cmp(dict(self.iteritems()), other)
+ def __len__(self):
+ return len(self.keys())
From scoder at codespeak.net Fri Feb 1 12:34:14 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Feb 2008 12:34:14 +0100 (CET)
Subject: [Lxml-checkins] r51168 - in lxml/trunk: . doc
Message-ID: <20080201113414.C2E6E1684CB@codespeak.net>
Author: scoder
Date: Fri Feb 1 12:34:13 2008
New Revision: 51168
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/FAQ.txt
Log:
r3373 at delle: sbehnel | 2008-02-01 07:40:54 +0100
separate out Zope in users list
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Fri Feb 1 12:34:13 2008
@@ -120,15 +120,19 @@
* cssutils_, a CSS parser and toolkit, can be used with ``lxml.cssselect``
* Deliverance_, a content theming tool
-* gocept.lxml_, Zope3 interface bindings for lxml
* Inteproxy_, a secure HTTP proxy
* lwebstring_, an XML template engine
* OpenXMLlib_, a library for handling OpenXML document meta data
* Pycoon_, a WSGI web development framework based on XML pipelines
* rfadict_, an RDFa parser with a simple dictionary-like interface.
-And a couple of generally happy_ users_, and other `sites that link to
-lxml`_.
+Zope3 and some of its extensions have good support for lxml:
+
+* gocept.lxml_, Zope3 interface bindings for lxml
+* z3c.rml_, an implementation of ReportLab's RML format
+
+And don't miss the quotes by our generally happy_ users_, and other
+`sites that link to lxml`_.
.. _cssutils: http://code.google.com/p/cssutils/source/browse/trunk/examples/style.py?r=917
.. _Deliverance: http://www.openplans.org/projects/deliverance/project-home
@@ -138,6 +142,7 @@
.. _OpenXMLlib: http://permalink.gmane.org/gmane.comp.python.lxml.devel/3250
.. _Pycoon: http://pypi.python.org/pypi/pycoon
.. _rfadict: http://pypi.python.org/pypi/rdfadict
+.. _z3c.rml: http://pypi.python.org/pypi/z3c.rml
.. _happy: http://thread.gmane.org/gmane.comp.python.lxml.devel/3244/focus=3244
.. _users: http://article.gmane.org/gmane.comp.python.lxml.devel/3246
From scoder at codespeak.net Fri Feb 1 12:34:18 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Feb 2008 12:34:18 +0100 (CET)
Subject: [Lxml-checkins] r51169 - lxml/trunk
Message-ID: <20080201113418.923DD1684CC@codespeak.net>
Author: scoder
Date: Fri Feb 1 12:34:17 2008
New Revision: 51169
Modified:
lxml/trunk/ (props changed)
lxml/trunk/Makefile
Log:
r3374 at delle: sbehnel | 2008-02-01 10:16:04 +0100
fixed benchmark run from Makefile
Modified: lxml/trunk/Makefile
==============================================================================
--- lxml/trunk/Makefile (original)
+++ lxml/trunk/Makefile Fri Feb 1 12:34:17 2008
@@ -25,7 +25,10 @@
$(PYTHON) test.py
bench_inplace: inplace
- $(PYTHON) bench.py -i
+ $(PYTHON) benchmark/bench_etree.py -i
+ $(PYTHON) benchmark/bench_xpath.py -i
+ $(PYTHON) benchmark/bench_xslt.py -i
+ $(PYTHON) benchmark/bench_objectify.py -i
ftest_build: build
$(PYTHON) test.py -f $(TESTFLAGS) $(TESTOPTS)
From scoder at codespeak.net Fri Feb 1 12:34:22 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Feb 2008 12:34:22 +0100 (CET)
Subject: [Lxml-checkins] r51170 - in lxml/trunk: . doc
Message-ID: <20080201113422.C96BA1684CF@codespeak.net>
Author: scoder
Date: Fri Feb 1 12:34:22 2008
New Revision: 51170
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/tutorial.txt
Log:
r3375 at delle: sbehnel | 2008-02-01 11:40:10 +0100
tutorial update: mention parsing from URLs
Modified: lxml/trunk/doc/tutorial.txt
==============================================================================
--- lxml/trunk/doc/tutorial.txt (original)
+++ lxml/trunk/doc/tutorial.txt Fri Feb 1 12:34:22 2008
@@ -546,11 +546,12 @@
Parsing from strings and files
==============================
-``lxml.etree`` supports parsing XML in a number of ways and from all important
-sources, namely strings, files and file-like objects. The main parse
-functions are ``fromstring()`` and ``parse()``, both called with the source as
-first argument. By default, they use the standard parser, but you can always
-pass a different parser as second argument.
+``lxml.etree`` supports parsing XML in a number of ways and from all
+important sources, namely strings, files, URLs (http/ftp) and
+file-like objects. The main parse functions are ``fromstring()`` and
+``parse()``, both called with the source as first argument. By
+default, they use the standard parser, but you can always pass a
+different parser as second argument.
The fromstring() function
@@ -601,6 +602,20 @@
>>> print etree.tostring(root)
data
+The ``parse()`` function supports any of the following sources:
+
+* an open file object
+
+* a file-like object that has a ``.read(byte_count)`` method returning
+ a byte string on each call
+
+* a filename string
+
+* an HTTP or FTP URL string
+
+Note that passing a filename or URL is usually faster than passing an
+open file.
+
Parser objects
--------------
From scoder at codespeak.net Fri Feb 1 12:34:26 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Feb 2008 12:34:26 +0100 (CET)
Subject: [Lxml-checkins] r51171 - in lxml/trunk: . doc
Message-ID: <20080201113426.E21A31684D6@codespeak.net>
Author: scoder
Date: Fri Feb 1 12:34:26 2008
New Revision: 51171
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/FAQ.txt
Log:
r3376 at delle: sbehnel | 2008-02-01 11:40:55 +0100
FAQ update: Cython is *almost* Python
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Fri Feb 1 12:34:26 2008
@@ -320,19 +320,23 @@
Why is lxml not written in Python?
----------------------------------
-lxml interfaces with two C libraries: libxml2 and libxslt. Accessing them at
-the C-level is required for performance reasons.
+It *almost* is.
-To avoid writing plain C-code and caring too much about the details of
-built-in types and reference counting, lxml is written in Cython_, a
-Python-like language that is translated into C-code. Chances are that if you
-know Python, you can write `code that Cython accepts`_. Again, the C-ish style
-used in the lxml code is just for performance optimisations. If you want to
-contribute, don't bother with the details, a Python implementation of your
-contribution is better than none. And keep in mind that lxml's flexible API
-often favours an implementation of features in pure Python, without bothering
-with C-code at all. For example, the ``lxml.html`` package is entirely written
-in Python.
+lxml is not written in plain Python, because it interfaces with two C
+libraries: libxml2 and libxslt. Accessing them at the C-level is
+required for performance reasons.
+
+However, to avoid writing plain C-code and caring too much about the
+details of built-in types and reference counting, lxml is written in
+Cython_, a Python-like language that is translated into C-code.
+Chances are that if you know Python, you can write `code that Cython
+accepts`_. Again, the C-ish style used in the lxml code is just for
+performance optimisations. If you want to contribute, don't bother
+with the details, a Python implementation of your contribution is
+better than none. And keep in mind that lxml's flexible API often
+favours an implementation of features in pure Python, without
+bothering with C-code at all. For example, the ``lxml.html`` package
+is entirely written in Python.
Please contact the `mailing list`_ if you need any help.
From scoder at codespeak.net Fri Feb 1 12:34:31 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Feb 2008 12:34:31 +0100 (CET)
Subject: [Lxml-checkins] r51172 - in lxml/trunk: . doc doc/html
Message-ID: <20080201113431.D05C41684CB@codespeak.net>
Author: scoder
Date: Fri Feb 1 12:34:30 2008
New Revision: 51172
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/html/style.css
lxml/trunk/doc/main.txt
Log:
r3377 at delle: sbehnel | 2008-02-01 12:33:19 +0100
added page quote to project page
Modified: lxml/trunk/doc/html/style.css
==============================================================================
--- lxml/trunk/doc/html/style.css (original)
+++ lxml/trunk/doc/html/style.css Fri Feb 1 12:34:30 2008
@@ -205,12 +205,31 @@
font-style: italic;
}
-div.line-block {
+div.eyecatcher {
font-family: Times, "Times New Roman", serif;
text-align: center;
font-size: 140%;
}
+div.pagequote {
+ position: absolute;
+ top: 0px;
+ right: 0px;
+ padding: 10px 10px 0 0;
+ text-align: right;
+ font-size: 80%;
+ color: #990000;
+}
+
+div.pagequote .reference {
+ font-size: 140%;
+}
+
+html > .pagequote {
+ /* ignored by IE -> everyone else knows 'fixed', right? */
+ position: fixed;
+}
+
code {
color: Black;
background-color: #cccccc;
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Fri Feb 1 12:34:30 2008
@@ -5,11 +5,18 @@
:description: lxml - the most feature-rich and easy-to-use library for working with XML and HTML in the Python language
:keywords: lxml, etree, objectify, Python, XML, HTML
+.. class:: eyecatcher
+
| lxml is the most feature-rich
| and easy-to-use library
| for working with XML and HTML
| in the Python language.
+.. class:: pagequote
+
+| `? lxml takes all the pain out of XML. ? `_
+| Stephan Richter
+
..
1 Introduction
2 Documentation
From scoder at codespeak.net Fri Feb 1 12:34:35 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Feb 2008 12:34:35 +0100 (CET)
Subject: [Lxml-checkins] r51173 - in lxml/trunk: . doc
Message-ID: <20080201113435.182AE1684CC@codespeak.net>
Author: scoder
Date: Fri Feb 1 12:34:34 2008
New Revision: 51173
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/tutorial.txt
Log:
r3378 at delle: sbehnel | 2008-02-01 12:33:42 +0100
explain different return values of fromstring() and parse()
Modified: lxml/trunk/doc/tutorial.txt
==============================================================================
--- lxml/trunk/doc/tutorial.txt (original)
+++ lxml/trunk/doc/tutorial.txt Fri Feb 1 12:34:34 2008
@@ -602,6 +602,10 @@
>>> print etree.tostring(root)
data
+The reasoning behind this difference is that ``parse()`` returns a
+complete document from a file, while the string parsing functions are
+commonly used to parse XML fragments.
+
The ``parse()`` function supports any of the following sources:
* an open file object
From scoder at codespeak.net Fri Feb 1 14:00:45 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Feb 2008 14:00:45 +0100 (CET)
Subject: [Lxml-checkins] r51176 - in lxml/trunk: . doc
Message-ID: <20080201130045.8DBD1168471@codespeak.net>
Author: scoder
Date: Fri Feb 1 14:00:42 2008
New Revision: 51176
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/validation.txt
Log:
r3387 at delle: sbehnel | 2008-02-01 13:58:46 +0100
skip catalog test
Modified: lxml/trunk/doc/validation.txt
==============================================================================
--- lxml/trunk/doc/validation.txt (original)
+++ lxml/trunk/doc/validation.txt Fri Feb 1 14:00:42 2008
@@ -71,9 +71,9 @@
If the validation fails (be it for a DTD or an XML schema), the parser
will raise an exception::
- >>> root = etree.fromstring("not int", parser)
+ >>> root = etree.fromstring("no int", parser)
Traceback (most recent call last):
- XMLSyntaxError: Element 'a': 'not int' is not a valid value of the atomic type 'xs:integer'.
+ XMLSyntaxError: Element 'a': 'no int' is not a valid value of the atomic type 'xs:integer'.
If you want the parser to succeed regardless of the outcome of the
validation, you should use a non validating parser and run the
@@ -111,15 +111,11 @@
:1:0:ERROR:VALID:DTD_NOT_EMPTY: Element b was declared EMPTY this one has content
As an alternative to parsing from a file, you can use the
-``external_id`` keyword argument to parse from a catalog::
+``external_id`` keyword argument to parse from a catalog. The
+following example reads the DocBook DTD in version 4.2, if available
+in the system catalog::
- >>> docbook = "-//OASIS//DTD DocBook XML V4.2//EN"
- >>> dtd = etree.DTD(external_id = docbook) # requires catalog support
-
- >>> root = etree.XML("")
- >>> dtd.assertValid(root) # doctest: +ELLIPSIS
- Traceback (most recent call last):
- DocumentInvalid: Element article content does not follow the DTD, ...
+ dtd = etree.DTD(external_id = "-//OASIS//DTD DocBook XML V4.2//EN")
RelaxNG
From scoder at codespeak.net Fri Feb 1 14:54:28 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Feb 2008 14:54:28 +0100 (CET)
Subject: [Lxml-checkins] r51180 - in lxml/trunk: . src/lxml
Message-ID: <20080201135428.5D11B1684C2@codespeak.net>
Author: scoder
Date: Fri Feb 1 14:54:27 2008
New Revision: 51180
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/dtd.pxi
Log:
r3390 at delle: sbehnel | 2008-02-01 14:52:59 +0100
handle unicode URLs in DTD(), just in case
Modified: lxml/trunk/src/lxml/dtd.pxi
==============================================================================
--- lxml/trunk/src/lxml/dtd.pxi (original)
+++ lxml/trunk/src/lxml/dtd.pxi Fri Feb 1 14:54:27 2008
@@ -31,7 +31,8 @@
self._c_dtd = NULL
_Validator.__init__(self)
if file is not None:
- if python._isString(file):
+ if _isString(file):
+ file = _encodeFilename(file)
self._error_log.connect()
self._c_dtd = xmlparser.xmlParseDTD(NULL, _cstr(file))
self._error_log.disconnect()
From scoder at codespeak.net Fri Feb 1 14:54:33 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Feb 2008 14:54:33 +0100 (CET)
Subject: [Lxml-checkins] r51181 - in lxml/trunk: . doc
Message-ID: <20080201135433.64C791684C9@codespeak.net>
Author: scoder
Date: Fri Feb 1 14:54:32 2008
New Revision: 51181
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/doc/main.txt
lxml/trunk/version.txt
Log:
r3391 at delle: sbehnel | 2008-02-01 14:53:55 +0100
prepare release of 2.0
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Feb 1 14:54:32 2008
@@ -2,8 +2,8 @@
lxml changelog
==============
-Under development
-=================
+2.0 (2008-02-01)
+================
Features added
--------------
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Fri Feb 1 14:54:32 2008
@@ -5,6 +5,11 @@
:description: lxml - the most feature-rich and easy-to-use library for working with XML and HTML in the Python language
:keywords: lxml, etree, objectify, Python, XML, HTML
+.. class:: pagequote
+
+| `? lxml takes all the pain out of XML. ? `_
+| Stephan Richter
+
.. class:: eyecatcher
| lxml is the most feature-rich
@@ -12,11 +17,6 @@
| for working with XML and HTML
| in the Python language.
-.. class:: pagequote
-
-| `? lxml takes all the pain out of XML. ? `_
-| Stephan Richter
-
..
1 Introduction
2 Documentation
@@ -44,10 +44,6 @@
.. _FAQ: FAQ.html
-**This page describes the current in-development version of lxml that will
-become lxml 2.0.**
-
-
Documentation
-------------
@@ -149,8 +145,8 @@
.. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/
.. _`this key`: pubkey.asc
-The latest version is `lxml 2.0beta2`_, released 2008-01-26
-(`changes for 2.0beta2`_). `Older versions`_ are listed below.
+The latest version is `lxml 2.0`_, released 2008-02-01
+(`changes for 2.0`_). `Older versions`_ are listed below.
.. _`Older versions`: #old-versions
@@ -210,6 +206,8 @@
Old Versions
------------
+* `lxml 2.0beta2`_, released 2008-01-26 (`changes for 2.0beta2`_)
+
* `lxml 2.0beta1`_, released 2008-01-11 (`changes for 2.0beta1`_)
* `lxml 2.0alpha6`_, released 2007-12-19 (`changes for 2.0alpha6`_)
@@ -274,6 +272,7 @@
* `lxml 0.5`_, released 2005-04-08
+.. _`lxml 2.0`: lxml-2.0.tgz
.. _`lxml 2.0beta2`: lxml-2.0beta2.tgz
.. _`lxml 2.0beta1`: lxml-2.0beta1.tgz
.. _`lxml 2.0alpha6`: lxml-2.0alpha6.tgz
@@ -307,6 +306,7 @@
.. _`lxml 0.5.1`: lxml-0.5.1.tgz
.. _`lxml 0.5`: lxml-0.5.tgz
+.. _`changes for 2.0`: changes-2.0.html
.. _`changes for 2.0beta2`: changes-2.0beta2.html
.. _`changes for 2.0beta1`: changes-2.0beta1.html
.. _`changes for 2.0alpha6`: changes-2.0alpha6.html
Modified: lxml/trunk/version.txt
==============================================================================
--- lxml/trunk/version.txt (original)
+++ lxml/trunk/version.txt Fri Feb 1 14:54:32 2008
@@ -1 +1 @@
-2.0beta2
+2.0
From scoder at codespeak.net Fri Feb 1 16:26:10 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Feb 2008 16:26:10 +0100 (CET)
Subject: [Lxml-checkins] r51188 - in lxml/trunk: . doc src/lxml
src/lxml/tests
Message-ID: <20080201152610.1CF221684C2@codespeak.net>
Author: scoder
Date: Fri Feb 1 16:26:09 2008
New Revision: 51188
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/doc/lxml2.txt
lxml/trunk/doc/parsing.txt
lxml/trunk/doc/tutorial.txt
lxml/trunk/src/lxml/lxml.etree.pyx
lxml/trunk/src/lxml/serializer.pxi
lxml/trunk/src/lxml/tests/test_etree.py
Log:
r3394 at delle: sbehnel | 2008-02-01 15:57:58 +0100
deprecate et.tounicode() in favour of et.tostring(encoding=unicode)
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Feb 1 16:26:09 2008
@@ -8,6 +8,10 @@
Features added
--------------
+* Passing the ``unicode`` type as ``encoding`` to ``tostring()`` will
+ serialise to unicode. The ``tounicode()`` function is now
+ deprecated.
+
* ``XMLSchema()`` and ``RelaxNG()`` can parse from StringIO.
* ``makeparser()`` function in ``lxml.objectify`` to create a new
Modified: lxml/trunk/doc/lxml2.txt
==============================================================================
--- lxml/trunk/doc/lxml2.txt (original)
+++ lxml/trunk/doc/lxml2.txt Fri Feb 1 16:26:09 2008
@@ -7,7 +7,7 @@
1 Changes in etree and objectify
1.1 Incompatible changes
1.2 Enhancements
- 1.3 Other changes
+ 1.3 Deprecated features
2 New modules
2.1 lxml.html
2.2 lxml.cssselect
@@ -136,8 +136,23 @@
path expression, e.g. by the ``string()`` function or extension
functions, will return None as their parent.
+* Parse time XML Schema validation is now supported by passing an
+ XMLSchema object to the ``schema`` keyword argument of a parser.
+
+* The parsers support a ``target`` object that implements
+ ElementTree's `TreeBuilder interface`_.
+
.. _`E factory`: objectify.html#tree-generation-with-the-e-factory
.. _`find the Element that carries it`: tutorial.html#using-xpath-to-find-text
+.. _`TreeBuilder interface`: http://effbot.org/elementtree/elementtree-treebuilder.htm
+
+
+Deprecated features
+-------------------
+
+The following features were deprecated and will be removed in lxml 2.1:
+
+* The ``tounicode()`` function was replaced by ``tostring(encoding=unicode)``.
New modules
Modified: lxml/trunk/doc/parsing.txt
==============================================================================
--- lxml/trunk/doc/parsing.txt (original)
+++ lxml/trunk/doc/parsing.txt Fri Feb 1 16:26:09 2008
@@ -488,9 +488,9 @@
Serialising to Unicode strings
------------------------------
-To serialize the result, you would normally use the ``tostring`` module
-function, which serializes to plain ASCII by default or a number of other
-encodings if asked for::
+To serialize the result, you would normally use the ``tostring()``
+module function, which serializes to plain ASCII by default or a
+number of other byte encodings if asked for::
>>> etree.tostring(root)
' + '
@@ -498,33 +498,36 @@
>>> etree.tostring(root, encoding='UTF-8', xml_declaration=False)
' \xef\xa3\x91 + \xef\xa3\x92 '
-As an extension, lxml.etree has a new ``tounicode()`` function that you can
-call on XML tree objects to retrieve a Python unicode representation::
+As an extension, lxml.etree recognises the unicode type as encoding to
+build a Python unicode representation of a tree::
- >>> etree.tounicode(root)
+ >>> etree.tostring(root, encoding=unicode)
u' \uf8d1 + \uf8d2 '
>>> el = etree.Element("test")
- >>> etree.tounicode(el)
+ >>> etree.tostring(el, encoding=unicode)
u''
>>> subel = etree.SubElement(el, "subtest")
- >>> etree.tounicode(el)
+ >>> etree.tostring(el, encoding=unicode)
u''
>>> tree = etree.ElementTree(el)
- >>> etree.tounicode(tree)
+ >>> etree.tostring(tree, encoding=unicode)
u''
-The result of ``tounicode()`` can be treated like any other Python unicode
-string and then passed back into the parsers. However, if you want to save
-the result to a file or pass it over the network, you should use ``write()``
-or ``tostring()`` with an encoding argument (typically UTF-8) to serialize the
-XML. The main reason is that unicode strings returned by ``tounicode()``
-never have an XML declaration and therefore do not specify their encoding.
-These strings are most likely not parsable by other XML libraries.
-
-In contrast, the ``tostring()`` function automatically adds a declaration as
-needed that reflects the encoding of the returned string. This makes it
-possible for other parsers to correctly parse the XML byte stream. Note that
-using ``tostring()`` with UTF-8 is also considerably faster in most cases.
+The result of ``tostring(encoding=unicode)`` can be treated like any
+other Python unicode string and then passed back into the parsers.
+However, if you want to save the result to a file or pass it over the
+network, you should use ``write()`` or ``tostring()`` with a byte
+encoding (typically UTF-8) to serialize the XML. The main reason is
+that unicode strings returned by ``tostring(encoding=unicode)`` are
+not byte streams and they never have an XML declaration to specify
+their encoding. These strings are most likely not parsable by other
+XML libraries.
+
+For normal byte encodings, the ``tostring()`` function automatically
+adds a declaration as needed that reflects the encoding of the
+returned string. This makes it possible for other parsers to
+correctly parse the XML byte stream. Note that using ``tostring()``
+with UTF-8 is also considerably faster in most cases.
Modified: lxml/trunk/doc/tutorial.txt
==============================================================================
--- lxml/trunk/doc/tutorial.txt (original)
+++ lxml/trunk/doc/tutorial.txt Fri Feb 1 16:26:09 2008
@@ -486,9 +486,10 @@
>>> print etree.tostring(root, method='text')
HelloWorld
-For the plain text output, the ``tounicode()`` function might become handy::
+For the plain text output, serialising to a Python unicode string
+might become handy. Just pass the ``unicode`` type as encoding::
- >>> etree.tounicode(root, method='text')
+ >>> etree.tostring(root, encoding=unicode, method='text')
u'HelloWorld'
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Feb 1 16:26:09 2008
@@ -13,6 +13,9 @@
except AttributeError:
from sets import Set as set
+cdef object _unicode
+_unicode = __builtin__.unicode
+
del __builtin__
cdef object os_path_join
@@ -2253,6 +2256,9 @@
'xml_declaration' (bool). Note that changing the encoding to a non UTF-8
compatible encoding will enable a declaration by default.
+ You can also serialise to a Unicode string without declaration by
+ passing the ``unicode`` function as encoding.
+
The keyword argument 'pretty_print' (bool) enables formatted XML.
The keyword argument 'method' selects the output method: 'xml',
@@ -2263,7 +2269,12 @@
on the tail text of children, which will always be serialised.
"""
cdef bint write_declaration
- if xml_declaration is None:
+ if encoding is _unicode:
+ if xml_declaration:
+ raise ValueError(
+ "Serialisation to unicode must not request an XML declaration")
+ write_declaration = 0
+ elif xml_declaration is None:
# by default, write an XML declaration only for non-standard encodings
write_declaration = encoding is not None and encoding.upper() not in \
('ASCII', 'UTF-8', 'UTF8', 'US-ASCII')
@@ -2309,6 +2320,8 @@
You can prevent the tail text of the element from being serialised
by passing the boolean ``with_tail`` option. This has no impact
on the tail text of children, which will always be serialised.
+
+ @deprecated: use ``tostring(el, encoding=unicode)`` instead.
"""
if isinstance(element_or_tree, _Element):
return _tounicode(<_Element>element_or_tree, method, 0, pretty_print,
Modified: lxml/trunk/src/lxml/serializer.pxi
==============================================================================
--- lxml/trunk/src/lxml/serializer.pxi (original)
+++ lxml/trunk/src/lxml/serializer.pxi Fri Feb 1 16:26:09 2008
@@ -57,6 +57,9 @@
return None
if encoding is None:
c_enc = NULL
+ elif encoding is _unicode:
+ return _tounicode(element, method, write_complete_document,
+ pretty_print, with_tail)
else:
encoding = _utf8(encoding)
c_enc = _cstr(encoding)
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Fri Feb 1 16:26:09 2008
@@ -2021,6 +2021,75 @@
result = tounicode(a, pretty_print=True)
self.assertEquals(result, "\n \n \n\n")
+ def test_tostring_unicode(self):
+ tostring = self.etree.tostring
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+
+ self.assert_(isinstance(tostring(a, encoding=unicode), unicode))
+ self.assertEquals('',
+ canonicalize(tostring(a, encoding=unicode)))
+
+ def test_tostring_unicode_element(self):
+ tostring = self.etree.tostring
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+ d = SubElement(c, 'd')
+ self.assert_(isinstance(tostring(b, encoding=unicode), unicode))
+ self.assert_(isinstance(tostring(c, encoding=unicode), unicode))
+ self.assertEquals('',
+ canonicalize(tostring(b, encoding=unicode)))
+ self.assertEquals('',
+ canonicalize(tostring(c, encoding=unicode)))
+
+ def test_tostring_unicode_none(self):
+ tostring = self.etree.tostring
+ self.assertRaises(TypeError, self.etree.tostring,
+ None, encoding=unicode)
+
+ def test_tostring_unicode_element_tail(self):
+ tostring = self.etree.tostring
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+ d = SubElement(c, 'd')
+ b.tail = 'Foo'
+
+ self.assert_(isinstance(tostring(b, encoding=unicode), unicode))
+ self.assert_(tostring(b, encoding=unicode) == 'Foo' or
+ tostring(b, encoding=unicode) == 'Foo')
+
+ def test_tostring_unicode_pretty(self):
+ tostring = self.etree.tostring
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('a')
+ b = SubElement(a, 'b')
+ c = SubElement(a, 'c')
+
+ result = tostring(a, encoding=unicode)
+ self.assertEquals(result, "")
+
+ result = tostring(a, encoding=unicode, pretty_print=False)
+ self.assertEquals(result, "")
+
+ result = tostring(a, encoding=unicode, pretty_print=True)
+ self.assertEquals(result, "\n \n \n\n")
+
+ # helper methods
+
def _writeElement(self, element, encoding='us-ascii'):
"""Write out element for comparison.
"""
From scoder at codespeak.net Fri Feb 1 16:26:16 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Feb 2008 16:26:16 +0100 (CET)
Subject: [Lxml-checkins] r51189 - in lxml/trunk: . doc src/lxml
Message-ID: <20080201152616.755571684CC@codespeak.net>
Author: scoder
Date: Fri Feb 1 16:26:15 2008
New Revision: 51189
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/lxml2.txt
lxml/trunk/src/lxml/xmlerror.pxi
Log:
r3395 at delle: sbehnel | 2008-02-01 16:25:37 +0100
cleanup in deprecated functions, say what will be removed in lxml 2.1
Modified: lxml/trunk/doc/lxml2.txt
==============================================================================
--- lxml/trunk/doc/lxml2.txt (original)
+++ lxml/trunk/doc/lxml2.txt Fri Feb 1 16:26:15 2008
@@ -7,11 +7,11 @@
1 Changes in etree and objectify
1.1 Incompatible changes
1.2 Enhancements
- 1.3 Deprecated features
+ 1.3 Deprecation
2 New modules
- 2.1 lxml.html
- 2.2 lxml.cssselect
- 2.3 lxml.doctestcompare
+ 2.1 lxml.usedoctest
+ 2.2 lxml.html
+ 2.3 lxml.cssselect
During the development of the lxml 1.x series, a couple of quirks were
@@ -147,12 +147,32 @@
.. _`TreeBuilder interface`: http://effbot.org/elementtree/elementtree-treebuilder.htm
-Deprecated features
--------------------
+Deprecation
+-----------
-The following features were deprecated and will be removed in lxml 2.1:
+The following functions and methods were deprecated and will be
+removed in lxml 2.1:
-* The ``tounicode()`` function was replaced by ``tostring(encoding=unicode)``.
+* The ``tounicode()`` function was replaced by the call
+ ``tostring(encoding=unicode)``.
+
+* CamelCaseNamed module functions were renamed to their underscore
+ equivalents to follow `PEP 8`_ in naming.
+
+ - ``etree.setDefaultParser()`` -> ``etree.set_default_parser()``
+
+ - ``etree.getDefaultParser()`` -> ``etree.get_default_parser()``
+
+ - ``etree.useGlobalPythonLog()`` -> ``etree.use_global_python_log()``
+
+ - ``XMLParser.setElementClassLookup()`` -> ``.set_element_class_lookup()``
+
+ - ``HTMLParser.setElementClassLookup()`` -> ``.set_element_class_lookup()``
+
+* The ``.getiterator()`` method on Elements and ElementTrees was
+ renamed to ``.iter()`` to follow ElementTree 1.3.
+
+.. _`PEP 8`: http://www.python.org/dev/peps/pep-0008/
New modules
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Fri Feb 1 16:26:15 2008
@@ -4,9 +4,17 @@
# module level API functions
+def clear_error_log():
+ """Clear the global error log. Note that this log is already bound to a
+ fixed size.
+ """
+ __GLOBAL_ERROR_LOG.clear()
+
def clearErrorLog():
"""Clear the global error log. Note that this log is already bound to a
fixed size.
+
+ @deprecated: use ``clear_error_log()`` instead.
"""
__GLOBAL_ERROR_LOG.clear()
@@ -386,6 +394,17 @@
Note that this disables access to the global error log from exceptions.
Parsers, XSLT etc. will continue to provide their normal local error log.
+
+ @deprecated: use ``use_global_python_log()`` instead.
+ """
+ use_global_python_log(log)
+
+def use_global_python_log(PyErrorLog log not None):
+ """Replace the global error log by an etree.PyErrorLog that uses the
+ standard Python logging package.
+
+ Note that this disables access to the global error log from exceptions.
+ Parsers, XSLT etc. will continue to provide their normal local error log.
"""
global __GLOBAL_ERROR_LOG
__GLOBAL_ERROR_LOG = log
From scoder at codespeak.net Fri Feb 1 18:25:30 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Feb 2008 18:25:30 +0100 (CET)
Subject: [Lxml-checkins] r51190 - in lxml/trunk: . doc
Message-ID: <20080201172530.6661C1684FE@codespeak.net>
Author: scoder
Date: Fri Feb 1 18:25:28 2008
New Revision: 51190
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/lxml2.txt
Log:
r3398 at delle: sbehnel | 2008-02-01 18:24:58 +0100
more on: what's new
Modified: lxml/trunk/doc/lxml2.txt
==============================================================================
--- lxml/trunk/doc/lxml2.txt (original)
+++ lxml/trunk/doc/lxml2.txt Fri Feb 1 18:25:28 2008
@@ -70,6 +70,13 @@
with cElementTree, which also enforces keyword-only arguments in a
couple of places.
+* XML tag names are validated when creating an Element. This does not
+ apply to HTML tags, where only HTML special characters are
+ forbidden. The distinction is made by the ``SubElement()`` factory,
+ which tests if the tree it works on is an HTML tree, and by the
+ ``.makeelement()`` methods of parsers, which behave differently for
+ the ``XMLParser()`` and the ``HTMLParser()``.
+
* XPath now raises exceptions specific to the part of the execution that
failed: ``XPathSyntaxError`` for parser errors and ``XPathEvalError`` for
errors that occurred during the evaluation. Note that the distinction only
@@ -136,11 +143,25 @@
path expression, e.g. by the ``string()`` function or extension
functions, will return None as their parent.
-* Parse time XML Schema validation is now supported by passing an
+* Setting a ``QName`` object as value of the ``.text`` property or as
+ an attribute value will resolve its prefix in the respective context
+
+* Following ElementTree 1.3, the ``iterfind()`` method supports
+ efficient iteration based on XPath-like expressions.
+
+The parsers also received some major enhancements:
+
+* ``iterparse()`` can parse HTML when passing the boolean ``html``
+ keyword.
+
+* Parse time XML Schema validation by passing an
XMLSchema object to the ``schema`` keyword argument of a parser.
-* The parsers support a ``target`` object that implements
- ElementTree's `TreeBuilder interface`_.
+* Support for a ``target`` object that implements ElementTree's
+ `TreeBuilder interface`_.
+
+* The ``encoding`` keyword allows overriding the document encoding.
+
.. _`E factory`: objectify.html#tree-generation-with-the-e-factory
.. _`find the Element that carries it`: tutorial.html#using-xpath-to-find-text
@@ -150,14 +171,14 @@
Deprecation
-----------
-The following functions and methods were deprecated and will be
-removed in lxml 2.1:
+The following functions and methods are now deprecated. They are
+still available in lxml 2.0 and will be removed in lxml 2.1:
* The ``tounicode()`` function was replaced by the call
``tostring(encoding=unicode)``.
-* CamelCaseNamed module functions were renamed to their underscore
- equivalents to follow `PEP 8`_ in naming.
+* CamelCaseNamed module functions and methods were renamed to their
+ underscore equivalents to follow `PEP 8`_ in naming.
- ``etree.setDefaultParser()`` -> ``etree.set_default_parser()``
From scoder at codespeak.net Fri Feb 1 19:01:30 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Feb 2008 19:01:30 +0100 (CET)
Subject: [Lxml-checkins] r51191 - lxml/trunk
Message-ID: <20080201180130.7EB7E1684ED@codespeak.net>
Author: scoder
Date: Fri Feb 1 19:01:29 2008
New Revision: 51191
Modified:
lxml/trunk/ (props changed)
lxml/trunk/MANIFEST.in
Log:
r3400 at delle: sbehnel | 2008-02-01 19:01:03 +0100
removed reference to non-existing file
Modified: lxml/trunk/MANIFEST.in
==============================================================================
--- lxml/trunk/MANIFEST.in (original)
+++ lxml/trunk/MANIFEST.in Fri Feb 1 19:01:29 2008
@@ -12,4 +12,4 @@
recursive-include doc *.txt *.html *.css *.xml *.mgp pubkey.asc tagpython.png
recursive-include fake_pyrex *.py
include doc/mkhtml.py doc/rest2html.py
-exclude doc/pyrex.txt src/lxml/etree.pxi
+exclude doc/pyrex.txt
From scoder at codespeak.net Fri Feb 1 19:03:58 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Feb 2008 19:03:58 +0100 (CET)
Subject: [Lxml-checkins] r51192 - lxml/tag/lxml-2.0
Message-ID: <20080201180358.7564E1684EF@codespeak.net>
Author: scoder
Date: Fri Feb 1 19:03:57 2008
New Revision: 51192
Added:
lxml/tag/lxml-2.0/
- copied from r51191, lxml/trunk/
Log:
tag for lxml 2.0
From scoder at codespeak.net Fri Feb 1 19:33:33 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 1 Feb 2008 19:33:33 +0100 (CET)
Subject: [Lxml-checkins] r51193 - lxml/branch/lxml-2.0
Message-ID: <20080201183333.635571684DB@codespeak.net>
Author: scoder
Date: Fri Feb 1 19:33:32 2008
New Revision: 51193
Added:
lxml/branch/lxml-2.0/
- copied from r51192, lxml/tag/lxml-2.0/
Log:
new branch for lxml 2.0 series
From scoder at codespeak.net Sun Feb 3 21:00:57 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 3 Feb 2008 21:00:57 +0100 (CET)
Subject: [Lxml-checkins] r51232 - in lxml/trunk: . doc
Message-ID: <20080203200057.52C011683D1@codespeak.net>
Author: scoder
Date: Sun Feb 3 21:00:56 2008
New Revision: 51232
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/build.txt
Log:
r3405 at delle: sbehnel | 2008-02-02 16:32:10 +0100
doc clarification
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Sun Feb 3 21:00:56 2008
@@ -165,10 +165,11 @@
an older version. The result can be segfaults on this platform that are hard
to track down.
-To make sure the newer libxml2 and libxslt versions are used (e.g. under
-fink), you should add the directory where you installed the libraries to the
-``DYLD_LIBRARY_PATH`` environment variable. This seems to fix a lot of
-problems for users.
+To make sure the newer libxml2 and libxslt versions are used
+(e.g. those provided by fink or macports), you should add the
+directory where you installed the libraries to the
+``DYLD_LIBRARY_PATH`` environment variable *at runtime* (i.e. not just
+at build time). This seems to fix a lot of problems for users.
Alternatively, you can build lxml statically. A way to do this on MS Windows
is described in the next section, but it should be easy to adapt it for
From lxml-checkins at codespeak.net Sun Feb 3 21:31:52 2008
From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net)
Date: Sun, 3 Feb 2008 21:31:52 +0100 (CET)
Subject: [Lxml-checkins] February 50% OFF
Message-ID: <20080203143126.4928.qmail@ppp85-140-54-38.pppoe.mtu-net.ru>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20080203/29cc36dd/attachment.htm
From lxml-checkins at codespeak.net Mon Feb 4 10:18:21 2008
From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net)
Date: Mon, 4 Feb 2008 10:18:21 +0100 (CET)
Subject: [Lxml-checkins] January 72% OFF
Message-ID: <20080204131810.8322.qmail@dsl88.241-13866.ttnet.net.tr>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20080204/f33aed8b/attachment.htm
From scoder at codespeak.net Mon Feb 4 19:35:14 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 4 Feb 2008 19:35:14 +0100 (CET)
Subject: [Lxml-checkins] r51261 - in lxml/trunk: . src/lxml/tests
Message-ID: <20080204183514.441C91684F0@codespeak.net>
Author: scoder
Date: Mon Feb 4 19:35:12 2008
New Revision: 51261
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
r3407 at delle: sbehnel | 2008-02-04 19:33:49 +0100
disable cET tests for cET for te current version
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Mon Feb 4 19:35:12 2008
@@ -16,7 +16,7 @@
if cElementTree is not None:
if tuple([int(n) for n in
- getattr(cElementTree, "VERSION", "0.0").split(".")]) <= (1,0,6):
+ getattr(cElementTree, "VERSION", "0.0").split(".")]) <= (1,0,7):
cElementTree = None
try:
From scoder at codespeak.net Mon Feb 4 19:35:17 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 4 Feb 2008 19:35:17 +0100 (CET)
Subject: [Lxml-checkins] r51262 - in lxml/trunk: . doc
Message-ID: <20080204183517.BB19F168507@codespeak.net>
Author: scoder
Date: Mon Feb 4 19:35:16 2008
New Revision: 51262
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/build.txt
Log:
r3408 at delle: sbehnel | 2008-02-04 19:34:29 +0100
doc clarification: Cython should not used for a regular build
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Mon Feb 4 19:35:16 2008
@@ -23,22 +23,31 @@
Cython
------
-The lxml.etree and lxml.objectify modules are written in Cython_. Since we
-distribute the Cython-generated .c files with lxml releases, however, you do
-not need Cython to build lxml from the normal release sources.
-
+.. _EasyInstall: http://peak.telecommunity.com/DevCenter/EasyInstall
.. _Cython: http://www.cython.org
-If you are interested in building lxml from a Subversion checkout or want to
-be an lxml developer, you do need a working Cython installation. You can use
-EasyInstall_ to install it::
+The lxml.etree and lxml.objectify modules are written in Cython_.
+Since we distribute the Cython-generated .c files with lxml releases,
+however, you do not need Cython to build lxml from the normal release
+sources. We even encourage you to *not install Cython* for a normal
+release build, as the generated C code can vary quite heavily between
+Cython versions, which may or may not generate correct code for lxml.
+The pre-generated release sources were tested and therefore are known
+to work.
+
+So, if you want a reliable build of lxml, we suggest to a) use a
+source release of lxml and b) disable or uninstall Cython for the
+build.
+
+*Only* if you are interested in building lxml from a Subversion
+checkout (e.g. to test a bug fix that has not been release yet) or if
+want to be an lxml developer, then you do need a working Cython
+installation. You can use EasyInstall_ to install it::
- easy_install Cython==0.9.6.11
-
-.. _EasyInstall: http://peak.telecommunity.com/DevCenter/EasyInstall
+ easy_install Cython==0.9.6.11b
-lxml currently requires at least Cython 0.9.6.11, but later versions
-should work.
+lxml currently requires Cython 0.9.6.11b, later versions were not
+tested.
Subversion
From scoder at codespeak.net Tue Feb 5 20:23:47 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 5 Feb 2008 20:23:47 +0100 (CET)
Subject: [Lxml-checkins] r51305 - in lxml/trunk: . doc
Message-ID: <20080205192347.B2C081683D7@codespeak.net>
Author: scoder
Date: Tue Feb 5 20:23:44 2008
New Revision: 51305
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/build.txt
Log:
r3411 at delle: sbehnel | 2008-02-05 10:26:58 +0100
link to MacOS-X thread
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Tue Feb 5 20:23:44 2008
@@ -180,10 +180,10 @@
``DYLD_LIBRARY_PATH`` environment variable *at runtime* (i.e. not just
at build time). This seems to fix a lot of problems for users.
-Alternatively, you can build lxml statically. A way to do this on MS Windows
-is described in the next section, but it should be easy to adapt it for
-Mac-OS. That way, you can always be sure you use the versions you compiled
-lxml with, regardless of the runtime environement.
+Please read this thread about `experiences with MacOS-X`_ if you
+encounter problems.
+
+.. _`experiences with MacOS-X`: http://thread.gmane.org/gmane.comp.python.lxml.devel/3290/focus=3290
Static linking on Windows
From scoder at codespeak.net Tue Feb 5 20:23:53 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 5 Feb 2008 20:23:53 +0100 (CET)
Subject: [Lxml-checkins] r51306 - in lxml/trunk: . doc
Message-ID: <20080205192353.7677B1683E1@codespeak.net>
Author: scoder
Date: Tue Feb 5 20:23:52 2008
New Revision: 51306
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/build.txt
Log:
r3412 at delle: sbehnel | 2008-02-05 11:25:19 +0100
MacOS doc clarifications
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Tue Feb 5 20:23:52 2008
@@ -174,11 +174,17 @@
an older version. The result can be segfaults on this platform that are hard
to track down.
-To make sure the newer libxml2 and libxslt versions are used
-(e.g. those provided by fink or macports), you should add the
-directory where you installed the libraries to the
-``DYLD_LIBRARY_PATH`` environment variable *at runtime* (i.e. not just
-at build time). This seems to fix a lot of problems for users.
+To make sure the newer libxml2 and libxslt versions (e.g. those
+provided by fink or macports) are used at *build time*, you must take
+care that the script ``xslt-config`` is found from the newly installed
+version when running the build setup. The system libraries also
+provide this script, but the new one must come first in the PATH.
+
+To make sure the newer libxml2 and libxslt versions are used at
+*runtime*, you should add *all* directories where the newer libraries
+are installed (i.e. libxml2, libxslt and libexslt) to the
+``DYLD_LIBRARY_PATH`` environment variable when you use lxml (i.e. not
+only at build time). This seems to fix a lot of problems for users.
Please read this thread about `experiences with MacOS-X`_ if you
encounter problems.
From scoder at codespeak.net Tue Feb 5 20:23:59 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 5 Feb 2008 20:23:59 +0100 (CET)
Subject: [Lxml-checkins] r51307 - in lxml/trunk: . doc
Message-ID: <20080205192359.1413516844A@codespeak.net>
Author: scoder
Date: Tue Feb 5 20:23:58 2008
New Revision: 51307
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/build.txt
Log:
r3413 at delle: sbehnel | 2008-02-05 17:32:39 +0100
MacOS doc clarifications
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Tue Feb 5 20:23:58 2008
@@ -187,9 +187,11 @@
only at build time). This seems to fix a lot of problems for users.
Please read this thread about `experiences with MacOS-X`_ if you
-encounter problems.
+encounter problems. It also has a `buildout for lxml`_ that you can
+use.
.. _`experiences with MacOS-X`: http://thread.gmane.org/gmane.comp.python.lxml.devel/3290/focus=3290
+.. _`buildout for lxml`: http://thread.gmane.org/gmane.comp.python.lxml.devel/3290/focus=3297
Static linking on Windows
From scoder at codespeak.net Thu Feb 7 08:17:50 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 7 Feb 2008 08:17:50 +0100 (CET)
Subject: [Lxml-checkins] r51315 - lxml/trunk
Message-ID: <20080207071750.09D341684D7@codespeak.net>
Author: scoder
Date: Thu Feb 7 08:17:48 2008
New Revision: 51315
Modified:
lxml/trunk/ (props changed)
lxml/trunk/setup.py
Log:
r3417 at delle: sbehnel | 2008-02-06 10:41:34 +0100
link to project homepage from PyPI
Modified: lxml/trunk/setup.py
==============================================================================
--- lxml/trunk/setup.py (original)
+++ lxml/trunk/setup.py Thu Feb 7 08:17:48 2008
@@ -76,7 +76,8 @@
It extends the ElementTree API significantly to offer support for XPath,
RelaxNG, XML Schema, XSLT, C14N and much more.
-To contact the project, go to the project home page or see our bug tracker at
+To contact the project, go to the `project home page
+`_ or see our bug tracker at
https://launchpad.net/lxml
In case you want to use the current in-development version of lxml, you can
From scoder at codespeak.net Thu Feb 7 08:17:54 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 7 Feb 2008 08:17:54 +0100 (CET)
Subject: [Lxml-checkins] r51316 - in lxml/trunk: . doc
Message-ID: <20080207071754.AB19D1684D8@codespeak.net>
Author: scoder
Date: Thu Feb 7 08:17:53 2008
New Revision: 51316
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/FAQ.txt
Log:
r3418 at delle: sbehnel | 2008-02-06 10:48:03 +0100
link to Enfold Proxy 4
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Thu Feb 7 08:17:53 2008
@@ -120,6 +120,7 @@
* cssutils_, a CSS parser and toolkit, can be used with ``lxml.cssselect``
* Deliverance_, a content theming tool
+* `Enfold Proxy 4`_, a web server accelerator with on-the-fly XSLT processing
* Inteproxy_, a secure HTTP proxy
* lwebstring_, an XML template engine
* OpenXMLlib_, a library for handling OpenXML document meta data
@@ -136,6 +137,7 @@
.. _cssutils: http://code.google.com/p/cssutils/source/browse/trunk/examples/style.py?r=917
.. _Deliverance: http://www.openplans.org/projects/deliverance/project-home
+.. _`Enfold Proxy 4`: http://www.enfoldsystems.com/Products/Proxy/4
.. _gocept.lxml: http://pypi.python.org/pypi/gocept.lxml
.. _Inteproxy: http://lists.wald.intevation.org/pipermail/inteproxy-devel/2007-February/000000.html
.. _lwebstring: http://pypi.python.org/pypi/lwebstring
From scoder at codespeak.net Thu Feb 7 08:17:58 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 7 Feb 2008 08:17:58 +0100 (CET)
Subject: [Lxml-checkins] r51317 - in lxml/trunk: . doc
Message-ID: <20080207071758.EB0091684E6@codespeak.net>
Author: scoder
Date: Thu Feb 7 08:17:58 2008
New Revision: 51317
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/FAQ.txt
Log:
r3419 at delle: sbehnel | 2008-02-06 17:32:58 +0100
doc beautification
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Thu Feb 7 08:17:58 2008
@@ -116,7 +116,7 @@
Also note that the compatibility to the ElementTree library does not
require projects to set a hard dependency on lxml - as long as they do
-not need lxml's enhanced feature set.
+not take advantage of lxml's enhanced feature set.
* cssutils_, a CSS parser and toolkit, can be used with ``lxml.cssselect``
* Deliverance_, a content theming tool
From scoder at codespeak.net Thu Feb 7 08:18:03 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 7 Feb 2008 08:18:03 +0100 (CET)
Subject: [Lxml-checkins] r51318 - in lxml/trunk: . src/lxml/tests
Message-ID: <20080207071803.49E961684D8@codespeak.net>
Author: scoder
Date: Thu Feb 7 08:18:02 2008
New Revision: 51318
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/common_imports.py
Log:
r3420 at delle: sbehnel | 2008-02-07 08:17:17 +0100
fix ET version comparison
Modified: lxml/trunk/src/lxml/tests/common_imports.py
==============================================================================
--- lxml/trunk/src/lxml/tests/common_imports.py (original)
+++ lxml/trunk/src/lxml/tests/common_imports.py Thu Feb 7 08:18:02 2008
@@ -5,6 +5,15 @@
from lxml import etree
+def make_version_tuple(version_string):
+ l = []
+ for part in re.findall('([0-9]+|[^0-9.]+)', version_string):
+ try:
+ l.append(int(part))
+ except ValueError:
+ l.append(part)
+ return tuple(l)
+
try:
from elementtree import ElementTree # standard ET
except ImportError:
@@ -14,7 +23,7 @@
ElementTree = None
if hasattr(ElementTree, 'VERSION'):
- if tuple(ElementTree.VERSION.split('.')) < (1,3):
+ if make_version_tuple(ElementTree.VERSION)[:2] < (1,3):
# compatibility tests require ET 1.3+
ElementTree = None
@@ -27,8 +36,8 @@
cElementTree = None
if hasattr(cElementTree, 'VERSION'):
- if tuple(cElementTree.VERSION.split('.')) < (1,0,7):
- # compatibility tests require cET 1.0.7+
+ if make_version_tuple(cElementTree.VERSION)[:2] <= (1,0):
+ # compatibility tests do not run with cET 1.0.7
cElementTree = None
try:
From scoder at codespeak.net Fri Feb 8 09:12:30 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Feb 2008 09:12:30 +0100 (CET)
Subject: [Lxml-checkins] r51326 - in lxml/trunk: . src/lxml
Message-ID: <20080208081230.724991684C3@codespeak.net>
Author: scoder
Date: Fri Feb 8 09:12:28 2008
New Revision: 51326
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
r3425 at delle: sbehnel | 2008-02-07 19:11:24 +0100
removed left-over method
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Feb 8 09:12:28 2008
@@ -1271,9 +1271,6 @@
def __setitem__(self, index, value):
self._raiseImmutable()
- def __setslice__(self, start, end, value):
- self._raiseImmutable()
-
property attrib:
def __get__(self):
return {}
From scoder at codespeak.net Fri Feb 8 09:12:34 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Feb 2008 09:12:34 +0100 (CET)
Subject: [Lxml-checkins] r51327 - in lxml/trunk: . src/lxml/tests
Message-ID: <20080208081234.290541684CF@codespeak.net>
Author: scoder
Date: Fri Feb 8 09:12:33 2008
New Revision: 51327
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
r3426 at delle: sbehnel | 2008-02-08 09:10:11 +0100
new test case for item assignment with namespaces
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Fri Feb 8 09:12:33 2008
@@ -2005,6 +2005,28 @@
self.assertEquals(
[d, c, b],
list(a))
+
+ def test_setslice_all_replace_reversed_ns(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('{ns}a')
+ b = SubElement(a, '{ns}b', {'{ns1}a1': 'test'})
+ c = SubElement(a, '{ns}c', {'{ns2}a2': 'test'})
+ d = SubElement(a, '{ns}d', {'{ns3}a3': 'test'})
+
+ s = [d, c, b]
+ a[:] = s
+ self.assertEquals(
+ [d, c, b],
+ list(a))
+ self.assertEquals(
+ ['{ns}d', '{ns}c', '{ns}b'],
+ [ child.tag for child in a ])
+
+ self.assertEquals(
+ [['{ns3}a3'], ['{ns2}a2'], ['{ns1}a1']],
+ [ child.attrib.keys() for child in a ])
def test_setslice_end(self):
Element = self.etree.Element
From scoder at codespeak.net Fri Feb 8 09:12:38 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Feb 2008 09:12:38 +0100 (CET)
Subject: [Lxml-checkins] r51328 - in lxml/trunk: . src/lxml
Message-ID: <20080208081238.C868C1684DE@codespeak.net>
Author: scoder
Date: Fri Feb 8 09:12:38 2008
New Revision: 51328
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.objectify.pyx
Log:
r3427 at delle: sbehnel | 2008-02-08 09:11:45 +0100
rewrite of OE.__setitem__() in objectify: copy all elements *before* setting the slice
Modified: lxml/trunk/src/lxml/lxml.objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.objectify.pyx (original)
+++ lxml/trunk/src/lxml/lxml.objectify.pyx Fri Feb 8 09:12:38 2008
@@ -269,7 +269,8 @@
def __setitem__(self, key, value):
"""Set the value of a sibling, counting from the first child of the
- parent.
+ parent. Implements key assignment, item assignment and slice
+ assignment.
* If argument is an integer, sets the sibling at that position.
@@ -280,12 +281,7 @@
items to the siblings.
"""
cdef _Element element
- cdef _Element parent
- cdef _Element new_element
- cdef tree.xmlNode* c_self_node
- cdef tree.xmlNode* c_parent
cdef tree.xmlNode* c_node
- cdef Py_ssize_t start, stop, step, slicelength
if python._isString(key):
key = _buildChildTag(self, key)
element = _lookupChild(self, key)
@@ -295,48 +291,21 @@
_replaceElement(element, value)
return
- c_self_node = self._c_node
- c_parent = c_self_node.parent
- if c_parent is NULL:
+ if self._c_node.parent is NULL:
# the 'root[i] = ...' case
raise TypeError("assignment to root element is invalid")
if python.PySlice_Check(key):
# slice assignment
- python.PySlice_GetIndicesEx(
- key, _countSiblings(self._c_node),
- &start, &stop, &step, &slicelength)
- # replace existing items
- new_items = iter(value)
- if step < 0:
- del_items = list(self)[start:stop:step]
- else:
- del_items = list(islice(self, start, stop, step))
- del_items = iter(del_items)
- parent = self.getparent()
- try:
- for el in del_items:
- item = new_items.next()
- _replaceElement(el, item)
- except StopIteration:
- remove = parent.remove
- remove(el)
- for el in del_items:
- remove(el)
- return
- else:
- # append remaining new items
- tag = self.tag
- for item in new_items:
- _appendValue(parent, tag, item)
+ _setSlice(key, self, value)
else:
# normal index assignment
if key < 0:
- c_node = c_parent.last
+ c_node = self._c_node.parent.last
else:
- c_node = c_parent.children
+ c_node = self._c_node.parent.children
c_node = _findFollowingSibling(
- c_node, tree._getNs(c_self_node), c_self_node.name, key)
+ c_node, tree._getNs(self._c_node), self._c_node.name, key)
if c_node is NULL:
raise IndexError(key)
element = elementFactory(self._doc, c_node)
@@ -538,6 +507,45 @@
PYTYPE_ATTRIBUTE_NAME)
cetree.setNodeText(element._c_node, value)
+cdef _setSlice(slice, _Element target, items):
+ cdef _Element parent
+ # collect new values
+ new_items = []
+ tag = target.tag
+ for item in items:
+ if isinstance(item, _Element):
+ # deep copy the new element
+ new_element = cetree.deepcopyNodeToDocument(
+ target._doc, (<_Element>item)._c_node)
+ new_element.tag = tag
+ python.PyList_Append(new_items, new_element)
+ else:
+ new_element = cetree.makeElement(
+ tag, target._doc, None, None, None, None, None)
+ _setElementValue(new_element, item)
+ python.PyList_Append(new_items, new_element)
+
+ # replace existing items
+ new_items = iter(new_items)
+ del_items = iter(target[slice])
+ parent = target.getparent()
+ try:
+ next_item = new_items.next
+ replace = parent.replace
+ for el in del_items:
+ item = next_item()
+ replace(el, item)
+ except StopIteration:
+ remove = parent.remove
+ remove(el)
+ for el in del_items:
+ remove(el)
+ return
+ else:
+ # append remaining new items
+ for item in new_items:
+ _appendValue(parent, tag, item)
+
################################################################################
# Data type support in subclasses
From scoder at codespeak.net Fri Feb 8 15:32:29 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Feb 2008 15:32:29 +0100 (CET)
Subject: [Lxml-checkins] r51335 - in lxml/trunk: . src/lxml/tests
Message-ID: <20080208143229.47183168534@codespeak.net>
Author: scoder
Date: Fri Feb 8 15:32:26 2008
New Revision: 51335
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/test_objectify.py
Log:
r3431 at delle: sbehnel | 2008-02-08 15:31:54 +0100
slicing tests for objectify
Modified: lxml/trunk/src/lxml/tests/test_objectify.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_objectify.py (original)
+++ lxml/trunk/src/lxml/tests/test_objectify.py Fri Feb 8 15:32:26 2008
@@ -427,7 +427,60 @@
self.assertRaises(TypeError, setattr, root.c1.c2, 'text', "test")
self.assertRaises(TypeError, setattr, root.c1.c2, 'pyval', "test")
- def test_setslice(self):
+ # slicing
+
+ def test_getslice_complete(self):
+ root = self.XML("c1c2")
+ self.assertEquals(["c1", "c2"],
+ [ c.text for c in root.c[:] ])
+
+ def test_getslice_partial(self):
+ root = self.XML("c1c2c3c4")
+ test_list = ["c1", "c2", "c3", "c4"]
+
+ self.assertEquals(test_list,
+ [ c.text for c in root.c[:] ])
+ self.assertEquals(test_list[1:2],
+ [ c.text for c in root.c[1:2] ])
+ self.assertEquals(test_list[-3:-1],
+ [ c.text for c in root.c[-3:-1] ])
+ self.assertEquals(test_list[-3:3],
+ [ c.text for c in root.c[-3:3] ])
+ self.assertEquals(test_list[-3000:3],
+ [ c.text for c in root.c[-3000:3] ])
+ self.assertEquals(test_list[-3:3000],
+ [ c.text for c in root.c[-3:3000] ])
+
+ def test_getslice_partial_neg(self):
+ root = self.XML("c1c2c3c4")
+ test_list = ["c1", "c2", "c3", "c4"]
+
+ self.assertEquals(test_list,
+ [ c.text for c in root.c[:] ])
+ self.assertEquals(test_list[2:1:-1],
+ [ c.text for c in root.c[2:1:-1] ])
+ self.assertEquals(test_list[-1:-3:-1],
+ [ c.text for c in root.c[-1:-3:-1] ])
+ self.assertEquals(test_list[2:-3:-1],
+ [ c.text for c in root.c[2:-3:-1] ])
+ self.assertEquals(test_list[2:-3000:-1],
+ [ c.text for c in root.c[2:-3000:-1] ])
+
+
+ def test_setslice_complete(self):
+ Element = self.Element
+ SubElement = self.etree.SubElement
+ root = Element("root")
+ root.c = ["c1", "c2"]
+
+ c1 = root.c[0]
+ c2 = root.c[1]
+
+ self.assertEquals([c1,c2], list(root.c))
+ self.assertEquals(["c1", "c2"],
+ [ c.text for c in root.c ])
+
+ def test_setslice_elements(self):
Element = self.Element
SubElement = self.etree.SubElement
root = Element("root")
@@ -455,6 +508,52 @@
self.assertEquals(["c1", "c2", "c2", "c1"],
[ c.text for c in root.c ])
+ def test_setslice_partial(self):
+ Element = self.Element
+ SubElement = self.etree.SubElement
+ root = Element("root")
+ l = ["c1", "c2", "c3", "c4"]
+ root.c = l
+
+ self.assertEquals(["c1", "c2", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ new_slice = ["cA", "cB"]
+ l[1:2] = new_slice
+ root.c[1:2] = new_slice
+
+ self.assertEquals(["c1", "cA", "cB", "c3", "c4"], l)
+ self.assertEquals(["c1", "cA", "cB", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ def test_setslice_partial_neg(self):
+ Element = self.Element
+ SubElement = self.etree.SubElement
+ root = Element("root")
+ l = ["c1", "c2", "c3", "c4"]
+ root.c = l
+
+ self.assertEquals(["c1", "c2", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ new_slice = ["cA", "cB"]
+ l[-1:1:-1] = new_slice
+ root.c[-1:1:-1] = new_slice
+
+ self.assertEquals(["c1", "c2", "cB", "cA", "c4"], l)
+ self.assertEquals(["c1", "c2", "cB", "cA", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ # other stuff
+
def test_set_string(self):
# make sure strings are not handled as sequences
Element = self.Element
From scoder at codespeak.net Fri Feb 8 15:39:50 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Feb 2008 15:39:50 +0100 (CET)
Subject: [Lxml-checkins] r51336 - in lxml/trunk: . src/lxml/tests
Message-ID: <20080208143950.08E31168534@codespeak.net>
Author: scoder
Date: Fri Feb 8 15:39:50 2008
New Revision: 51336
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/test_objectify.py
Log:
r3433 at delle: sbehnel | 2008-02-08 15:38:06 +0100
comment
Modified: lxml/trunk/src/lxml/tests/test_objectify.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_objectify.py (original)
+++ lxml/trunk/src/lxml/tests/test_objectify.py Fri Feb 8 15:39:50 2008
@@ -466,6 +466,7 @@
self.assertEquals(test_list[2:-3000:-1],
[ c.text for c in root.c[2:-3000:-1] ])
+ # slice assignment
def test_setslice_complete(self):
Element = self.Element
From scoder at codespeak.net Fri Feb 8 15:39:59 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Feb 2008 15:39:59 +0100 (CET)
Subject: [Lxml-checkins] r51337 - in lxml/trunk: . src/lxml
Message-ID: <20080208143959.B153B168535@codespeak.net>
Author: scoder
Date: Fri Feb 8 15:39:54 2008
New Revision: 51337
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.objectify.pyx
Log:
r3434 at delle: sbehnel | 2008-02-08 15:39:12 +0100
fix objectify slicing for negative start/stop/step
Modified: lxml/trunk/src/lxml/lxml.objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.objectify.pyx (original)
+++ lxml/trunk/src/lxml/lxml.objectify.pyx Fri Feb 8 15:39:54 2008
@@ -242,13 +242,7 @@
if python._isString(key):
return _lookupChildOrRaise(self, key)
elif python.PySlice_Check(key):
- python.PySlice_GetIndicesEx(
- key, _countSiblings(self._c_node),
- &start, &stop, &step, &slicelength)
- if step < 0:
- return list(self)[start:stop:step]
- else:
- return list(islice(self, start, stop, step))
+ return list(self)[key]
# normal item access
c_self_node = self._c_node
c_parent = c_self_node.parent
From scoder at codespeak.net Fri Feb 8 19:44:08 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Feb 2008 19:44:08 +0100 (CET)
Subject: [Lxml-checkins] r51340 - in lxml/trunk: . src/lxml/tests
Message-ID: <20080208184408.7A76016852B@codespeak.net>
Author: scoder
Date: Fri Feb 8 19:44:06 2008
New Revision: 51340
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/test_objectify.py
Log:
r3437 at delle: sbehnel | 2008-02-08 19:41:02 +0100
more slicing tests for objectify
Modified: lxml/trunk/src/lxml/tests/test_objectify.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_objectify.py (original)
+++ lxml/trunk/src/lxml/tests/test_objectify.py Fri Feb 8 19:44:06 2008
@@ -470,7 +470,6 @@
def test_setslice_complete(self):
Element = self.Element
- SubElement = self.etree.SubElement
root = Element("root")
root.c = ["c1", "c2"]
@@ -483,7 +482,6 @@
def test_setslice_elements(self):
Element = self.Element
- SubElement = self.etree.SubElement
root = Element("root")
root.c = ["c1", "c2"]
@@ -511,7 +509,6 @@
def test_setslice_partial(self):
Element = self.Element
- SubElement = self.etree.SubElement
root = Element("root")
l = ["c1", "c2", "c3", "c4"]
root.c = l
@@ -531,9 +528,77 @@
self.assertEquals(l,
[ c.text for c in root.c ])
+ def test_setslice_insert(self):
+ Element = self.Element
+ root = Element("root")
+ l = ["c1", "c2", "c3", "c4"]
+ root.c = l
+
+ self.assertEquals(["c1", "c2", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ new_slice = ["cA", "cB"]
+ l[1:1] = new_slice
+ root.c[1:1] = new_slice
+
+ self.assertEquals(["c1", "cA", "cB", "c2", "c3", "c4"], l)
+ self.assertEquals(["c1", "cA", "cB", "c2", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ def test_setslice_insert_neg(self):
+ Element = self.Element
+ root = Element("root")
+ l = ["c1", "c2", "c3", "c4"]
+ root.c = l
+
+ self.assertEquals(["c1", "c2", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ new_slice = ["cA", "cB"]
+ l[-2:-2] = new_slice
+ root.c[-2:-2] = new_slice
+
+ self.assertEquals(["c1", "c2", "cA", "cB", "c3", "c4"], l)
+ self.assertEquals(["c1", "c2", "cA", "cB", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ def test_setslice_empty(self):
+ Element = self.Element
+ root = Element("root")
+
+ root.c = []
+ self.assertRaises(
+ AttributeError, getattr, root, 'c')
+
+ def test_setslice_partial_wrong_length(self):
+ Element = self.Element
+ root = Element("root")
+ l = ["c1", "c2", "c3", "c4"]
+ root.c = l
+
+ self.assertEquals(["c1", "c2", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ new_slice = ["cA", "cB", "cC"]
+ self.assertRaises(
+ ValueError, operator.setitem,
+ l, slice(1,2,-1), new_slice)
+ self.assertRaises(
+ ValueError, operator.setitem,
+ root.c, slice(1,2,-1), new_slice)
+
def test_setslice_partial_neg(self):
Element = self.Element
- SubElement = self.etree.SubElement
root = Element("root")
l = ["c1", "c2", "c3", "c4"]
root.c = l
@@ -547,8 +612,29 @@
l[-1:1:-1] = new_slice
root.c[-1:1:-1] = new_slice
- self.assertEquals(["c1", "c2", "cB", "cA", "c4"], l)
- self.assertEquals(["c1", "c2", "cB", "cA", "c4"],
+ self.assertEquals(["c1", "c2", "cB", "cA"], l)
+ self.assertEquals(["c1", "c2", "cB", "cA"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ def test_setslice_partial_allneg(self):
+ Element = self.Element
+ root = Element("root")
+ l = ["c1", "c2", "c3", "c4"]
+ root.c = l
+
+ self.assertEquals(["c1", "c2", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ new_slice = ["cA", "cB"]
+ l[-1:-4:-2] = new_slice
+ root.c[-1:-4:-2] = new_slice
+
+ self.assertEquals(["c1", "cB", "c3", "cA"], l)
+ self.assertEquals(["c1", "cB", "c3", "cA"],
[ c.text for c in root.c ])
self.assertEquals(l,
[ c.text for c in root.c ])
@@ -558,7 +644,6 @@
def test_set_string(self):
# make sure strings are not handled as sequences
Element = self.Element
- SubElement = self.etree.SubElement
root = Element("root")
root.c = "TEST"
self.assertEquals(["TEST"],
@@ -567,7 +652,6 @@
def test_setitem_string(self):
# make sure strings are set as children
Element = self.Element
- SubElement = self.etree.SubElement
root = Element("root")
root["c"] = "TEST"
self.assertEquals(["TEST"],
@@ -576,7 +660,6 @@
def test_setitem_string_special(self):
# make sure 'text' etc. are set as children
Element = self.Element
- SubElement = self.etree.SubElement
root = Element("root")
root["text"] = "TEST"
From scoder at codespeak.net Fri Feb 8 19:44:12 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Feb 2008 19:44:12 +0100 (CET)
Subject: [Lxml-checkins] r51341 - in lxml/trunk: . src/lxml
Message-ID: <20080208184412.DCC4C168531@codespeak.net>
Author: scoder
Date: Fri Feb 8 19:44:11 2008
New Revision: 51341
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.objectify.pyx
Log:
r3438 at delle: sbehnel | 2008-02-08 19:43:35 +0100
another rewrite of objectify._setSlice() to fix the handling of slice-overlapping elements
Modified: lxml/trunk/src/lxml/lxml.objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.objectify.pyx (original)
+++ lxml/trunk/src/lxml/lxml.objectify.pyx Fri Feb 8 19:44:11 2008
@@ -503,6 +503,17 @@
cdef _setSlice(slice, _Element target, items):
cdef _Element parent
+ cdef tree.xmlNode* c_node
+ cdef Py_ssize_t c_step, c_start, pos
+ # collect existing slice
+ if (slice).step is None:
+ c_step = 1
+ else:
+ c_step = (slice).step
+ if c_step == 0:
+ raise ValueError("Invalid slice")
+ del_items = target[slice]
+
# collect new values
new_items = []
tag = target.tag
@@ -512,33 +523,58 @@
new_element = cetree.deepcopyNodeToDocument(
target._doc, (<_Element>item)._c_node)
new_element.tag = tag
- python.PyList_Append(new_items, new_element)
else:
new_element = cetree.makeElement(
tag, target._doc, None, None, None, None, None)
_setElementValue(new_element, item)
- python.PyList_Append(new_items, new_element)
+ python.PyList_Append(new_items, new_element)
+
+ # sanity check - raise what a list would raise
+ if c_step != 1 and \
+ python.PyList_GET_SIZE(del_items) != python.PyList_GET_SIZE(new_items):
+ raise ValueError(
+ "attempt to assign sequence of size %d to extended slice of size %d" % (
+ python.PyList_GET_SIZE(new_items),
+ python.PyList_GET_SIZE(del_items)))
# replace existing items
- new_items = iter(new_items)
- del_items = iter(target[slice])
+ pos = 0
parent = target.getparent()
- try:
- next_item = new_items.next
- replace = parent.replace
- for el in del_items:
- item = next_item()
- replace(el, item)
- except StopIteration:
+ replace = parent.replace
+ while pos < python.PyList_GET_SIZE(new_items) and \
+ pos < python.PyList_GET_SIZE(del_items):
+ replace(del_items[pos], new_items[pos])
+ pos += 1
+ # remove leftover items
+ if pos < python.PyList_GET_SIZE(del_items):
remove = parent.remove
- remove(el)
- for el in del_items:
- remove(el)
- return
- else:
- # append remaining new items
- for item in new_items:
- _appendValue(parent, tag, item)
+ while pos < python.PyList_GET_SIZE(del_items):
+ remove(del_items[pos])
+ pos += 1
+ # append remaining new items
+ if pos < python.PyList_GET_SIZE(new_items):
+ # the sanity check above guarantees (step == 1)
+ if pos > 0:
+ item = new_items[pos-1]
+ else:
+ if (slice).start > 0:
+ c_node = parent._c_node.children
+ else:
+ c_node = parent._c_node.last
+ c_node = _findFollowingSibling(
+ c_node, tree._getNs(target._c_node), target._c_node.name,
+ (slice).start - 1)
+ if c_node is NULL:
+ while pos < python.PyList_GET_SIZE(new_items):
+ cetree.appendChild(parent, new_items[pos])
+ pos += 1
+ return
+ item = cetree.elementFactory(parent._doc, c_node)
+ while pos < python.PyList_GET_SIZE(new_items):
+ add = item.addnext
+ item = new_items[pos]
+ add(item)
+ pos += 1
################################################################################
# Data type support in subclasses
From scoder at codespeak.net Fri Feb 8 19:51:09 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Feb 2008 19:51:09 +0100 (CET)
Subject: [Lxml-checkins] r51342 - lxml/trunk
Message-ID: <20080208185109.54A9816852B@codespeak.net>
Author: scoder
Date: Fri Feb 8 19:51:08 2008
New Revision: 51342
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
Log:
r3441 at delle: sbehnel | 2008-02-08 19:47:51 +0100
changelog
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Feb 8 19:51:08 2008
@@ -2,6 +2,22 @@
lxml changelog
==============
+2.0.1 (Under development)
+=========================
+
+Features added
+--------------
+
+Bugs fixed
+----------
+
+* Setting an element slice in objectify could insert overlapping
+ elements at the wrong position.
+
+Other changes
+-------------
+
+
2.0 (2008-02-01)
================
From scoder at codespeak.net Fri Feb 8 19:51:12 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Feb 2008 19:51:12 +0100 (CET)
Subject: [Lxml-checkins] r51343 - lxml/trunk
Message-ID: <20080208185112.49FA216852E@codespeak.net>
Author: scoder
Date: Fri Feb 8 19:51:11 2008
New Revision: 51343
Modified:
lxml/trunk/ (props changed)
lxml/trunk/version.txt
Log:
r3442 at delle: sbehnel | 2008-02-08 19:48:15 +0100
version
Modified: lxml/trunk/version.txt
==============================================================================
--- lxml/trunk/version.txt (original)
+++ lxml/trunk/version.txt Fri Feb 8 19:51:11 2008
@@ -1 +1 @@
-2.0
+2.0.1
From scoder at codespeak.net Fri Feb 8 19:51:16 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Feb 2008 19:51:16 +0100 (CET)
Subject: [Lxml-checkins] r51344 - in lxml/trunk: . doc
Message-ID: <20080208185116.C1BAB168531@codespeak.net>
Author: scoder
Date: Fri Feb 8 19:51:16 2008
New Revision: 51344
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/main.txt
Log:
r3443 at delle: sbehnel | 2008-02-08 19:50:40 +0100
version
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Fri Feb 8 19:51:16 2008
@@ -145,8 +145,8 @@
.. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/
.. _`this key`: pubkey.asc
-The latest version is `lxml 2.0`_, released 2008-02-01
-(`changes for 2.0`_). `Older versions`_ are listed below.
+The latest version is `lxml 2.0.1`_, released 2008-02-08
+(`changes for 2.0.1`_). `Older versions`_ are listed below.
.. _`Older versions`: #old-versions
@@ -206,21 +206,7 @@
Old Versions
------------
-* `lxml 2.0beta2`_, released 2008-01-26 (`changes for 2.0beta2`_)
-
-* `lxml 2.0beta1`_, released 2008-01-11 (`changes for 2.0beta1`_)
-
-* `lxml 2.0alpha6`_, released 2007-12-19 (`changes for 2.0alpha6`_)
-
-* `lxml 2.0alpha5`_, released 2007-11-24 (`changes for 2.0alpha5`_)
-
-* `lxml 2.0alpha4`_, released 2007-10-07 (`changes for 2.0alpha4`_)
-
-* `lxml 2.0alpha3`_, released 2007-09-26 (`changes for 2.0alpha3`_)
-
-* `lxml 2.0alpha2`_, released 2007-09-15 (`changes for 2.0alpha2`_)
-
-* `lxml 2.0alpha1`_, released 2007-09-02 (`changes for 2.0alpha1`_)
+* `lxml 2.0`_, released 2008-02-01 (`changes for 2.0`_)
* `lxml 1.3.6`_, released 2007-10-29 (`changes for 1.3.6`_)
@@ -272,15 +258,8 @@
* `lxml 0.5`_, released 2005-04-08
+.. _`lxml 2.0.1`: lxml-2.0.1.tgz
.. _`lxml 2.0`: lxml-2.0.tgz
-.. _`lxml 2.0beta2`: lxml-2.0beta2.tgz
-.. _`lxml 2.0beta1`: lxml-2.0beta1.tgz
-.. _`lxml 2.0alpha6`: lxml-2.0alpha6.tgz
-.. _`lxml 2.0alpha5`: lxml-2.0alpha5.tgz
-.. _`lxml 2.0alpha4`: lxml-2.0alpha4.tgz
-.. _`lxml 2.0alpha3`: lxml-2.0alpha3.tgz
-.. _`lxml 2.0alpha2`: lxml-2.0alpha2.tgz
-.. _`lxml 2.0alpha1`: lxml-2.0alpha1.tgz
.. _`lxml 1.3.6`: lxml-1.3.6.tgz
.. _`lxml 1.3.5`: lxml-1.3.5.tgz
.. _`lxml 1.3.4`: lxml-1.3.4.tgz
@@ -306,15 +285,8 @@
.. _`lxml 0.5.1`: lxml-0.5.1.tgz
.. _`lxml 0.5`: lxml-0.5.tgz
+.. _`changes for 2.0.1`: changes-2.0.1.html
.. _`changes for 2.0`: changes-2.0.html
-.. _`changes for 2.0beta2`: changes-2.0beta2.html
-.. _`changes for 2.0beta1`: changes-2.0beta1.html
-.. _`changes for 2.0alpha6`: changes-2.0alpha6.html
-.. _`changes for 2.0alpha5`: changes-2.0alpha5.html
-.. _`changes for 2.0alpha4`: changes-2.0alpha4.html
-.. _`changes for 2.0alpha3`: changes-2.0alpha3.html
-.. _`changes for 2.0alpha2`: changes-2.0alpha2.html
-.. _`changes for 2.0alpha1`: changes-2.0alpha1.html
.. _`changes for 1.3.6`: changes-1.3.6.html
.. _`changes for 1.3.5`: changes-1.3.5.html
.. _`changes for 1.3.4`: changes-1.3.4.html
From scoder at codespeak.net Fri Feb 8 20:39:59 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Feb 2008 20:39:59 +0100 (CET)
Subject: [Lxml-checkins] r51346 - in lxml/trunk: . src/lxml/tests
Message-ID: <20080208193959.AA98516853D@codespeak.net>
Author: scoder
Date: Fri Feb 8 20:39:58 2008
New Revision: 51346
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
r3447 at delle: sbehnel | 2008-02-08 20:00:25 +0100
new test case for setting a reversed slice with namespaces
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Fri Feb 8 20:39:58 2008
@@ -2005,8 +2005,8 @@
self.assertEquals(
[d, c, b],
list(a))
-
- def test_setslice_all_replace_reversed_ns(self):
+
+ def test_setslice_all_replace_reversed_ns1(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
@@ -2028,6 +2028,28 @@
[['{ns3}a3'], ['{ns2}a2'], ['{ns1}a1']],
[ child.attrib.keys() for child in a ])
+ def test_setslice_all_replace_reversed_ns2(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('{ns}a')
+ b = SubElement(a, '{ns1}b', {'{ns}a1': 'test'})
+ c = SubElement(a, '{ns2}c', {'{ns}a2': 'test'})
+ d = SubElement(a, '{ns3}d', {'{ns}a3': 'test'})
+
+ s = [d, c, b]
+ a[:] = s
+ self.assertEquals(
+ [d, c, b],
+ list(a))
+ self.assertEquals(
+ ['{ns3}d', '{ns2}c', '{ns1}b'],
+ [ child.tag for child in a ])
+
+ self.assertEquals(
+ [['{ns}a3'], ['{ns}a2'], ['{ns}a1']],
+ [ child.attrib.keys() for child in a ])
+
def test_setslice_end(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
From scoder at codespeak.net Fri Feb 8 20:40:03 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Feb 2008 20:40:03 +0100 (CET)
Subject: [Lxml-checkins] r51347 - lxml/trunk
Message-ID: <20080208194003.AF49A16853E@codespeak.net>
Author: scoder
Date: Fri Feb 8 20:40:02 2008
New Revision: 51347
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
Log:
r3448 at delle: sbehnel | 2008-02-08 20:37:40 +0100
changelog
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Feb 8 20:40:02 2008
@@ -11,7 +11,7 @@
Bugs fixed
----------
-* Setting an element slice in objectify could insert overlapping
+* Setting an element slice in objectify could insert slice-overlapping
elements at the wrong position.
Other changes
From scoder at codespeak.net Fri Feb 8 20:40:07 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Feb 2008 20:40:07 +0100 (CET)
Subject: [Lxml-checkins] r51348 - lxml/trunk
Message-ID: <20080208194007.512EB16853F@codespeak.net>
Author: scoder
Date: Fri Feb 8 20:40:06 2008
New Revision: 51348
Modified:
lxml/trunk/ (props changed)
lxml/trunk/setupinfo.py
Log:
r3449 at delle: sbehnel | 2008-02-08 20:38:10 +0100
support overriding xslt-config in setup.py: --with-xslt-config=XXX
Modified: lxml/trunk/setupinfo.py
==============================================================================
--- lxml/trunk/setupinfo.py (original)
+++ lxml/trunk/setupinfo.py Fri Feb 8 20:40:06 2008
@@ -81,7 +81,7 @@
return static_library_dirs
# filter them from xslt-config --libs
result = []
- possible_library_dirs = flags('xslt-config --libs')
+ possible_library_dirs = flags('libs')
for possible_library_dir in possible_library_dirs:
if possible_library_dir.startswith('-L'):
result.append(possible_library_dir[2:])
@@ -95,7 +95,7 @@
return static_include_dirs
# filter them from xslt-config --cflags
result = []
- possible_include_dirs = flags('xslt-config --cflags')
+ possible_include_dirs = flags('cflags')
for possible_include_dir in possible_include_dirs:
if possible_include_dir.startswith('-I'):
result.append(possible_include_dir[2:])
@@ -114,7 +114,7 @@
return result
# anything from xslt-config --cflags that doesn't start with -I
- possible_cflags = flags('xslt-config --cflags')
+ possible_cflags = flags('cflags')
for possible_cflag in possible_cflags:
if not possible_cflag.startswith('-I'):
result.append(possible_cflag)
@@ -127,8 +127,9 @@
if OPTION_WITHOUT_THREADING:
macros.append(('WITHOUT_THREADING', None))
return macros
-
-def flags(cmd):
+
+def flags(option):
+ cmd = "%s --%s" % (find_xslt_config(), option)
try:
import subprocess
except ImportError:
@@ -145,6 +146,22 @@
print("** make sure the development packages of libxml2 and libxslt are installed **\n")
return str(rf.read()).split()
+XSLT_CONFIG = None
+
+def find_xslt_config():
+ global XSLT_CONFIG
+ if XSLT_CONFIG:
+ return XSLT_CONFIG
+ option = '--with-xslt-config='
+ for arg in sys.argv:
+ if arg.startswith(option):
+ sys.argv.remove(arg)
+ XSLT_CONFIG = arg[len(option):]
+ return XSLT_CONFIG
+ else:
+ XSLT_CONFIG = 'xslt-config'
+ return XSLT_CONFIG
+
def has_option(name):
try:
sys.argv.remove('--%s' % name)
From scoder at codespeak.net Fri Feb 8 20:43:39 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Feb 2008 20:43:39 +0100 (CET)
Subject: [Lxml-checkins] r51349 - in lxml/trunk: . doc
Message-ID: <20080208194339.1B48F16853D@codespeak.net>
Author: scoder
Date: Fri Feb 8 20:43:38 2008
New Revision: 51349
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/build.txt
Log:
r3453 at delle: sbehnel | 2008-02-08 20:43:13 +0100
mention new --with-xslt-config option in build docs
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Fri Feb 8 20:43:38 2008
@@ -88,9 +88,13 @@
If you get errors about missing header files (e.g. ``libxml/xmlversion.h``)
then you need to make sure the development packages of both libxml2
-and libxslt are properly installed. If this doesn't help, you may
-have to add the location of the header files to the include path
-like::
+and libxslt are properly installed. Try passing the following option to
+setup.py to make sure the right config is found::
+
+ python setup.py build --with-xslt-config=/path/to/xslt-config
+
+If this doesn't help, you may have to add the location of the header
+files to the include path like::
python setup.py build_ext -i -I /usr/include/libxml2
@@ -178,7 +182,11 @@
provided by fink or macports) are used at *build time*, you must take
care that the script ``xslt-config`` is found from the newly installed
version when running the build setup. The system libraries also
-provide this script, but the new one must come first in the PATH.
+provide this script, but the new one must come first in the PATH. The
+best way to make sure the right version is used is by passing the path
+to the script as an option to setup.py::
+
+ python setup.py build --with-xslt-config=/path/to/xslt-config
To make sure the newer libxml2 and libxslt versions are used at
*runtime*, you should add *all* directories where the newer libraries
From scoder at codespeak.net Fri Feb 8 20:45:10 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 8 Feb 2008 20:45:10 +0100 (CET)
Subject: [Lxml-checkins] r51350 - lxml/trunk
Message-ID: <20080208194510.BF95F16853D@codespeak.net>
Author: scoder
Date: Fri Feb 8 20:45:10 2008
New Revision: 51350
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
Log:
r3455 at delle: sbehnel | 2008-02-08 20:44:45 +0100
changelog
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Feb 8 20:45:10 2008
@@ -17,6 +17,10 @@
Other changes
-------------
+* Passing ``--with-xslt-config=/path/to/xslt-config`` to setup.py will
+ override the ``xslt-config`` script that is used to determine the C
+ compiler options.
+
2.0 (2008-02-01)
================
From scoder at codespeak.net Sat Feb 9 18:31:28 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 9 Feb 2008 18:31:28 +0100 (CET)
Subject: [Lxml-checkins] r51361 - in lxml/trunk: . doc
Message-ID: <20080209173128.5F1BD1684EF@codespeak.net>
Author: scoder
Date: Sat Feb 9 18:31:26 2008
New Revision: 51361
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/performance.txt
Log:
r3457 at delle: sbehnel | 2008-02-09 18:30:53 +0100
updated benchmark results
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Sat Feb 9 18:31:26 2008
@@ -71,8 +71,8 @@
a specific part of the API yourself, please consider sending it to the lxml
mailing list.
-The timings cited below compare lxml 2.0alpha (with libxml2 2.6.30) to
-the December 2007 SVN trunk versions of ElementTree (1.3) and
+The timings cited below compare lxml 2.0 final (with libxml2 2.6.31)
+to the January 2008 SVN trunk versions of ElementTree (1.3) and
cElementTree (1.2.7). They were run single-threaded on a 1.8GHz Intel
Core Duo machine under Ubuntu Linux 7.10 (Gutsy). The C libraries
were compiled with the same platform specific optimisation flags. The
@@ -117,23 +117,23 @@
1.2, lxml is still more than 5 times as fast as the much improved
ElementTree 1.3::
- lxe: tostring_utf16 (SATR T1) 23.4821 msec/pass
+ lxe: tostring_utf16 (SATR T1) 19.0921 msec/pass
cET: tostring_utf16 (SATR T1) 129.8430 msec/pass
ET : tostring_utf16 (SATR T1) 136.1301 msec/pass
- lxe: tostring_utf16 (UATR T1) 23.4859 msec/pass
+ lxe: tostring_utf16 (UATR T1) 20.4630 msec/pass
cET: tostring_utf16 (UATR T1) 130.1570 msec/pass
ET : tostring_utf16 (UATR T1) 136.3101 msec/pass
- lxe: tostring_utf16 (S-TR T2) 24.2729 msec/pass
+ lxe: tostring_utf16 (S-TR T2) 18.8632 msec/pass
cET: tostring_utf16 (S-TR T2) 136.9388 msec/pass
ET : tostring_utf16 (S-TR T2) 143.9550 msec/pass
- lxe: tostring_utf8 (S-TR T2) 18.4860 msec/pass
+ lxe: tostring_utf8 (S-TR T2) 14.4310 msec/pass
cET: tostring_utf8 (S-TR T2) 137.0859 msec/pass
ET : tostring_utf8 (S-TR T2) 144.3110 msec/pass
- lxe: tostring_utf8 (U-TR T3) 2.7399 msec/pass
+ lxe: tostring_utf8 (U-TR T3) 2.6381 msec/pass
cET: tostring_utf8 (U-TR T3) 52.1040 msec/pass
ET : tostring_utf8 (U-TR T3) 53.1070 msec/pass
@@ -205,10 +205,10 @@
(given in seconds)::
lxe: -- S- U- -A SA UA
- T1: 0.0914 0.0875 0.0872 0.0892 0.0882 0.0900
- T2: 0.0894 0.0897 0.0892 0.0988 0.0978 0.0974
- T3: 0.0219 0.0194 0.0189 0.0570 0.0570 0.0573
- T4: 0.0004 0.0003 0.0003 0.0012 0.0012 0.0012
+ T1: 0.0783 0.0777 0.0774 0.0787 0.0781 0.0783
+ T2: 0.0799 0.0796 0.0799 0.0879 0.0882 0.0886
+ T3: 0.0245 0.0216 0.0217 0.0577 0.0575 0.0572
+ T4: 0.0003 0.0003 0.0003 0.0011 0.0011 0.0011
cET: -- S- U- -A SA UA
T1: 0.0272 0.0264 0.0267 0.0268 0.0261 0.0265
T2: 0.0280 0.0274 0.0273 0.0273 0.0276 0.0275
@@ -235,21 +235,21 @@
create a shallow copy of their list of children, lxml has to create a
Python object for each child and collect them in a list::
- lxe: root_list_children (--TR T1) 0.0169 msec/pass
+ lxe: root_list_children (--TR T1) 0.0160 msec/pass
cET: root_list_children (--TR T1) 0.0081 msec/pass
ET : root_list_children (--TR T1) 0.0541 msec/pass
- lxe: root_list_children (--TR T2) 0.2339 msec/pass
+ lxe: root_list_children (--TR T2) 0.2100 msec/pass
cET: root_list_children (--TR T2) 0.0319 msec/pass
ET : root_list_children (--TR T2) 0.4420 msec/pass
This handicap is also visible when accessing single children::
- lxe: first_child (--TR T2) 0.2470 msec/pass
+ lxe: first_child (--TR T2) 0.2429 msec/pass
cET: first_child (--TR T2) 0.2170 msec/pass
ET : first_child (--TR T2) 0.9968 msec/pass
- lxe: last_child (--TR T1) 0.2482 msec/pass
+ lxe: last_child (--TR T1) 0.2470 msec/pass
cET: last_child (--TR T1) 0.2291 msec/pass
ET : last_child (--TR T1) 0.9830 msec/pass
@@ -258,11 +258,11 @@
The data structure used by libxml2 is a linked tree, and thus, a
linked list of children::
- lxe: middle_child (--TR T1) 0.2789 msec/pass
+ lxe: middle_child (--TR T1) 0.2759 msec/pass
cET: middle_child (--TR T1) 0.2229 msec/pass
ET : middle_child (--TR T1) 1.0030 msec/pass
- lxe: middle_child (--TR T2) 1.9610 msec/pass
+ lxe: middle_child (--TR T2) 1.7071 msec/pass
cET: middle_child (--TR T2) 0.2229 msec/pass
ET : middle_child (--TR T2) 0.9930 msec/pass
@@ -274,7 +274,7 @@
in. This results in a major performance difference for creating independent
Elements that end up in independently created documents::
- lxe: create_elements (--TC T2) 3.1691 msec/pass
+ lxe: create_elements (--TC T2) 2.8961 msec/pass
cET: create_elements (--TC T2) 0.1929 msec/pass
ET : create_elements (--TC T2) 1.3590 msec/pass
@@ -282,11 +282,11 @@
are supposed to end up in, either as SubElements of an Element or using the
explicit ``Element.makeelement()`` call::
- lxe: makeelement (--TC T2) 2.2650 msec/pass
+ lxe: makeelement (--TC T2) 1.9000 msec/pass
cET: makeelement (--TC T2) 0.3211 msec/pass
ET : makeelement (--TC T2) 1.6358 msec/pass
- lxe: create_subelements (--TC T2) 1.9531 msec/pass
+ lxe: create_subelements (--TC T2) 1.7891 msec/pass
cET: create_subelements (--TC T2) 0.2351 msec/pass
ET : create_subelements (--TC T2) 3.2270 msec/pass
@@ -305,11 +305,11 @@
The following benchmark appends all root children of the second tree to the
root of the first tree::
- lxe: append_from_document (--TR T1,T2) 3.8681 msec/pass
+ lxe: append_from_document (--TR T1,T2) 3.3841 msec/pass
cET: append_from_document (--TR T1,T2) 0.2699 msec/pass
ET : append_from_document (--TR T1,T2) 1.2650 msec/pass
- lxe: append_from_document (--TR T3,T4) 0.0570 msec/pass
+ lxe: append_from_document (--TR T3,T4) 0.0441 msec/pass
cET: append_from_document (--TR T3,T4) 0.0169 msec/pass
ET : append_from_document (--TR T3,T4) 0.0820 msec/pass
@@ -322,20 +322,20 @@
This difference is not always as visible, but applies to most parts of the
API, like inserting newly created elements::
- lxe: insert_from_document (--TR T1,T2) 5.8019 msec/pass
+ lxe: insert_from_document (--TR T1,T2) 5.7020 msec/pass
cET: insert_from_document (--TR T1,T2) 0.4041 msec/pass
ET : insert_from_document (--TR T1,T2) 1.4789 msec/pass
or replacing the child slice by a newly created element::
- lxe: replace_children_element (--TC T1) 0.2480 msec/pass
+ lxe: replace_children_element (--TC T1) 0.2210 msec/pass
cET: replace_children_element (--TC T1) 0.0238 msec/pass
ET : replace_children_element (--TC T1) 0.1600 msec/pass
as opposed to replacing the slice with an existing element from the
same document::
- lxe: replace_children (--TC T1) 0.0188 msec/pass
+ lxe: replace_children (--TC T1) 0.0179 msec/pass
cET: replace_children (--TC T1) 0.0119 msec/pass
ET : replace_children (--TC T1) 0.0739 msec/pass
@@ -347,16 +347,16 @@
Deep copying a tree is fast in lxml::
- lxe: deepcopy_all (--TR T1) 10.9420 msec/pass
+ lxe: deepcopy_all (--TR T1) 9.7558 msec/pass
cET: deepcopy_all (--TR T1) 120.6188 msec/pass
ET : deepcopy_all (--TR T1) 902.6880 msec/pass
- lxe: deepcopy_all (-ATR T2) 12.5830 msec/pass
+ lxe: deepcopy_all (-ATR T2) 12.3210 msec/pass
cET: deepcopy_all (-ATR T2) 136.9810 msec/pass
ET : deepcopy_all (-ATR T2) 944.2801 msec/pass
- lxe: deepcopy_all (S-TR T3) 4.1170 msec/pass
- cET: deepcopy_all (S-TR T3) 36.1221 msec/pass
+ lxe: deepcopy_all (S-TR T3) 8.3981 msec/pass
+ cET: deepcopy_all (S-TR T3) 35.6541 msec/pass
ET : deepcopy_all (S-TR T3) 221.6041 msec/pass
So, for example, if you have a database-like scenario where you parse in a
@@ -372,37 +372,37 @@
especially if few elements are of interest or the target element tag name is
known, lxml is a good choice::
- lxe: getiterator_all (--TR T1) 5.8582 msec/pass
+ lxe: getiterator_all (--TR T1) 5.7251 msec/pass
cET: getiterator_all (--TR T1) 39.9489 msec/pass
ET : getiterator_all (--TR T1) 23.0000 msec/pass
- lxe: getiterator_islice (--TR T2) 0.0780 msec/pass
+ lxe: getiterator_islice (--TR T2) 0.0830 msec/pass
cET: getiterator_islice (--TR T2) 0.3440 msec/pass
ET : getiterator_islice (--TR T2) 0.2429 msec/pass
- lxe: getiterator_tag (--TR T2) 0.3119 msec/pass
+ lxe: getiterator_tag (--TR T2) 0.3011 msec/pass
cET: getiterator_tag (--TR T2) 14.1001 msec/pass
ET : getiterator_tag (--TR T2) 7.4241 msec/pass
- lxe: getiterator_tag_all (--TR T2) 0.6540 msec/pass
+ lxe: getiterator_tag_all (--TR T2) 0.6340 msec/pass
cET: getiterator_tag_all (--TR T2) 40.7901 msec/pass
ET : getiterator_tag_all (--TR T2) 21.0390 msec/pass
This translates directly into similar timings for ``Element.findall()``::
- lxe: findall (--TR T2) 8.1239 msec/pass
+ lxe: findall (--TR T2) 7.8950 msec/pass
cET: findall (--TR T2) 44.5340 msec/pass
ET : findall (--TR T2) 27.1149 msec/pass
- lxe: findall (--TR T3) 1.6870 msec/pass
+ lxe: findall (--TR T3) 1.7281 msec/pass
cET: findall (--TR T3) 12.9611 msec/pass
ET : findall (--TR T3) 8.6131 msec/pass
- lxe: findall_tag (--TR T2) 0.7660 msec/pass
+ lxe: findall_tag (--TR T2) 0.7720 msec/pass
cET: findall_tag (--TR T2) 40.6358 msec/pass
ET : findall_tag (--TR T2) 21.4581 msec/pass
- lxe: findall_tag (--TR T3) 0.2160 msec/pass
+ lxe: findall_tag (--TR T3) 0.2050 msec/pass
cET: findall_tag (--TR T3) 9.6831 msec/pass
ET : findall_tag (--TR T3) 5.2109 msec/pass
@@ -420,38 +420,38 @@
of the lxml API you use. The most straight forward way is to call the
``xpath()`` method on an Element or ElementTree::
- lxe: xpath_method (--TC T1) 1.8251 msec/pass
- lxe: xpath_method (--TC T2) 23.3159 msec/pass
- lxe: xpath_method (--TC T3) 0.1378 msec/pass
- lxe: xpath_method (--TC T4) 1.1270 msec/pass
+ lxe: xpath_method (--TC T1) 1.7459 msec/pass
+ lxe: xpath_method (--TC T2) 22.0850 msec/pass
+ lxe: xpath_method (--TC T3) 0.1309 msec/pass
+ lxe: xpath_method (--TC T4) 1.0772 msec/pass
This is well suited for testing and when the XPath expressions are as diverse
as the trees they are called on. However, if you have a single XPath
expression that you want to apply to a larger number of different elements,
the ``XPath`` class is the most efficient way to do it::
- lxe: xpath_class (--TC T1) 0.6981 msec/pass
- lxe: xpath_class (--TC T2) 3.6111 msec/pass
- lxe: xpath_class (--TC T3) 0.0591 msec/pass
- lxe: xpath_class (--TC T4) 0.1979 msec/pass
+ lxe: xpath_class (--TC T1) 0.6740 msec/pass
+ lxe: xpath_class (--TC T2) 3.1760 msec/pass
+ lxe: xpath_class (--TC T3) 0.0548 msec/pass
+ lxe: xpath_class (--TC T4) 0.1700 msec/pass
Note that this still allows you to use variables in the expression, so you can
parse it once and then adapt it through variables at call time. In other
cases, where you have a fixed Element or ElementTree and want to run different
expressions on it, you should consider the ``XPathEvaluator``::
- lxe: xpath_element (--TR T1) 0.4342 msec/pass
- lxe: xpath_element (--TR T2) 11.9958 msec/pass
- lxe: xpath_element (--TR T3) 0.1690 msec/pass
- lxe: xpath_element (--TR T4) 0.3510 msec/pass
+ lxe: xpath_element (--TR T1) 0.4151 msec/pass
+ lxe: xpath_element (--TR T2) 11.6129 msec/pass
+ lxe: xpath_element (--TR T3) 0.1299 msec/pass
+ lxe: xpath_element (--TR T4) 0.3409 msec/pass
While it looks slightly slower, creating an XPath object for each of the
expressions generates a much higher overhead here::
- lxe: xpath_class_repeat (--TC T1) 1.7619 msec/pass
- lxe: xpath_class_repeat (--TC T2) 21.9102 msec/pass
- lxe: xpath_class_repeat (--TC T3) 0.1330 msec/pass
- lxe: xpath_class_repeat (--TC T4) 1.0631 msec/pass
+ lxe: xpath_class_repeat (--TC T1) 1.6699 msec/pass
+ lxe: xpath_class_repeat (--TC T2) 20.4420 msec/pass
+ lxe: xpath_class_repeat (--TC T3) 0.1230 msec/pass
+ lxe: xpath_class_repeat (--TC T4) 0.9859 msec/pass
A longer example
@@ -608,21 +608,21 @@
tree. It avoids step-by-step Python element instantiations along the path,
which can substantially improve the access time::
- lxe: attribute (--TR T1) 9.8128 msec/pass
- lxe: attribute (--TR T2) 53.2899 msec/pass
- lxe: attribute (--TR T4) 9.6800 msec/pass
-
- lxe: objectpath (--TR T1) 5.4898 msec/pass
- lxe: objectpath (--TR T2) 48.4819 msec/pass
- lxe: objectpath (--TR T4) 5.3761 msec/pass
-
- lxe: attributes_deep (--TR T1) 56.3290 msec/pass
- lxe: attributes_deep (--TR T2) 62.4361 msec/pass
- lxe: attributes_deep (--TR T4) 15.8000 msec/pass
-
- lxe: objectpath_deep (--TR T1) 49.0060 msec/pass
- lxe: objectpath_deep (--TR T2) 52.5169 msec/pass
- lxe: objectpath_deep (--TR T4) 7.1371 msec/pass
+ lxe: attribute (--TR T1) 9.4581 msec/pass
+ lxe: attribute (--TR T2) 52.5560 msec/pass
+ lxe: attribute (--TR T4) 9.1729 msec/pass
+
+ lxe: objectpath (--TR T1) 4.8690 msec/pass
+ lxe: objectpath (--TR T2) 47.8780 msec/pass
+ lxe: objectpath (--TR T4) 4.7870 msec/pass
+
+ lxe: attributes_deep (--TR T1) 54.7471 msec/pass
+ lxe: attributes_deep (--TR T2) 62.7451 msec/pass
+ lxe: attributes_deep (--TR T4) 15.1050 msec/pass
+
+ lxe: objectpath_deep (--TR T1) 48.2810 msec/pass
+ lxe: objectpath_deep (--TR T2) 51.3949 msec/pass
+ lxe: objectpath_deep (--TR T4) 6.1419 msec/pass
Note, however, that parsing ObjectPath expressions is not for free either, so
this is most effective for frequently accessing the same element.
@@ -648,17 +648,17 @@
subtrees and elements) to cache, you can trade memory usage against access
speed::
- lxe: attribute_cached (--TR T1) 7.6170 msec/pass
- lxe: attribute_cached (--TR T2) 50.7941 msec/pass
- lxe: attribute_cached (--TR T4) 7.4880 msec/pass
-
- lxe: attributes_deep_cached (--TR T1) 49.9220 msec/pass
- lxe: attributes_deep_cached (--TR T2) 55.9340 msec/pass
- lxe: attributes_deep_cached (--TR T4) 10.0131 msec/pass
-
- lxe: objectpath_deep_cached (--TR T1) 44.9121 msec/pass
- lxe: objectpath_deep_cached (--TR T2) 48.2371 msec/pass
- lxe: objectpath_deep_cached (--TR T4) 3.9630 msec/pass
+ lxe: attribute_cached (--TR T1) 7.5061 msec/pass
+ lxe: attribute_cached (--TR T2) 50.1881 msec/pass
+ lxe: attribute_cached (--TR T4) 7.4170 msec/pass
+
+ lxe: attributes_deep_cached (--TR T1) 48.7239 msec/pass
+ lxe: attributes_deep_cached (--TR T2) 55.2199 msec/pass
+ lxe: attributes_deep_cached (--TR T4) 9.9740 msec/pass
+
+ lxe: objectpath_deep_cached (--TR T1) 43.4160 msec/pass
+ lxe: objectpath_deep_cached (--TR T2) 47.6480 msec/pass
+ lxe: objectpath_deep_cached (--TR T4) 3.4680 msec/pass
Things to note: you cannot currently use ``weakref.WeakKeyDictionary`` objects
for this as lxml's element objects do not support weak references (which are
From scoder at codespeak.net Sat Feb 9 18:37:36 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 9 Feb 2008 18:37:36 +0100 (CET)
Subject: [Lxml-checkins] r51362 - in lxml/trunk: . doc
Message-ID: <20080209173736.5B9281684F8@codespeak.net>
Author: scoder
Date: Sat Feb 9 18:37:34 2008
New Revision: 51362
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/performance.txt
Log:
r3459 at delle: sbehnel | 2008-02-09 18:37:06 +0100
small doc fix
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Sat Feb 9 18:37:34 2008
@@ -72,7 +72,7 @@
mailing list.
The timings cited below compare lxml 2.0 final (with libxml2 2.6.31)
-to the January 2008 SVN trunk versions of ElementTree (1.3) and
+to the January 2008 SVN trunk versions of ElementTree (1.3alpha) and
cElementTree (1.2.7). They were run single-threaded on a 1.8GHz Intel
Core Duo machine under Ubuntu Linux 7.10 (Gutsy). The C libraries
were compiled with the same platform specific optimisation flags. The
From scoder at codespeak.net Mon Feb 11 19:28:03 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 11 Feb 2008 19:28:03 +0100 (CET)
Subject: [Lxml-checkins] r51394 - in lxml/trunk: . src/lxml/html
src/lxml/html/tests
Message-ID: <20080211182803.9A6021683FA@codespeak.net>
Author: scoder
Date: Mon Feb 11 19:28:01 2008
New Revision: 51394
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/html/diff.py
lxml/trunk/src/lxml/html/tests/test_diff.txt
Log:
r3461 at delle: sbehnel | 2008-02-11 19:27:27 +0100
code simplification
Modified: lxml/trunk/src/lxml/html/diff.py
==============================================================================
--- lxml/trunk/src/lxml/html/diff.py (original)
+++ lxml/trunk/src/lxml/html/diff.py Mon Feb 11 19:28:01 2008
@@ -738,29 +738,17 @@
If skip_outer is true, then don't serialize the outermost tag
"""
-
- html_xsl = """\
-
-
-
-
-
-
-"""
- transform = etree.XSLT(etree.XML(html_xsl))
assert not isinstance(el, basestring), (
"You should pass in an element, not a string like %r" % el)
- html = str(transform(el))
+ html = etree.tostring(el, method="html", encoding="UTF-8")
if skip_outer:
# Get rid of the extra starting tag:
html = html[html.find('>')+1:]
- if skip_outer:
# Get rid of the extra end tag:
html = html[:html.rfind('<')]
- if skip_outer:
return html.strip()
else:
- return html.lstrip()
+ return html
def _fixup_ins_del_tags(doc):
"""fixup_ins_del_tags that works on an lxml document in-place
Modified: lxml/trunk/src/lxml/html/tests/test_diff.txt
==============================================================================
--- lxml/trunk/src/lxml/html/tests/test_diff.txt (original)
+++ lxml/trunk/src/lxml/html/tests/test_diff.txt Mon Feb 11 19:28:01 2008
@@ -204,10 +204,7 @@
Some text and
more text
>>> pfixup('''
...
One table
More stuff
''')
-
-
One table
-
More stuff
-
+
One table
More stuff
Testing split_unbalanced::
From ianb at codespeak.net Wed Feb 13 05:24:51 2008
From: ianb at codespeak.net (ianb at codespeak.net)
Date: Wed, 13 Feb 2008 05:24:51 +0100 (CET)
Subject: [Lxml-checkins] r51426 - lxml/trunk/src/lxml
Message-ID: <20080213042451.7F1311683BE@codespeak.net>
Author: ianb
Date: Wed Feb 13 05:24:50 2008
New Revision: 51426
Modified:
lxml/trunk/src/lxml/doctestcompare.py
Log:
add NOPARSE_MARKUP to __all__
Modified: lxml/trunk/src/lxml/doctestcompare.py
==============================================================================
--- lxml/trunk/src/lxml/doctestcompare.py (original)
+++ lxml/trunk/src/lxml/doctestcompare.py Wed Feb 13 05:24:50 2008
@@ -32,7 +32,7 @@
import doctest
import cgi
-__all__ = ['PARSE_HTML', 'PARSE_XML', 'LXMLOutputChecker',
+__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker',
'LHTMLOutputChecker', 'install', 'temp_install']
PARSE_HTML = doctest.register_optionflag('PARSE_HTML')
From ianb at codespeak.net Wed Feb 13 05:25:17 2008
From: ianb at codespeak.net (ianb at codespeak.net)
Date: Wed, 13 Feb 2008 05:25:17 +0100 (CET)
Subject: [Lxml-checkins] r51427 - in lxml/trunk: . src/lxml/html
src/lxml/html/tests
Message-ID: <20080213042517.926221683BE@codespeak.net>
Author: ianb
Date: Wed Feb 13 05:25:16 2008
New Revision: 51427
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/html/__init__.py
lxml/trunk/src/lxml/html/tests/test_forms.txt
Log:
Fix case when inputs have no name
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed Feb 13 05:25:16 2008
@@ -40,6 +40,10 @@
Bugs fixed
----------
+* Form elements would return None for ``form.fields.keys()`` if there
+ was an unnamed input field. Now unnamed input fields are completely
+ ignored.
+
Other changes
-------------
Modified: lxml/trunk/src/lxml/html/__init__.py
==============================================================================
--- lxml/trunk/src/lxml/html/__init__.py (original)
+++ lxml/trunk/src/lxml/html/__init__.py Wed Feb 13 05:25:16 2008
@@ -818,7 +818,8 @@
def keys(self):
names = sets.Set()
for el in self:
- names.add(el.name)
+ if el.name is not None:
+ names.add(el.name)
return list(names)
def __iter__(self):
Modified: lxml/trunk/src/lxml/html/tests/test_forms.txt
==============================================================================
--- lxml/trunk/src/lxml/html/tests/test_forms.txt (original)
+++ lxml/trunk/src/lxml/html/tests/test_forms.txt Wed Feb 13 05:25:16 2008
@@ -141,3 +141,24 @@
single_checkbox2: 'good'
check_group:
+>>> import lxml.html
+>>> tree = lxml.html.fromstring('''
+...
+...
+...
+... ''')
+>>> tree # doctest: +ELLIPSIS
+
+>>> tree.forms[0] # doctest: +ELLIPSIS
+
+>>> tree.forms[0].fields # doctest: +NOPARSE_MARKUP
+
+>>> tree.forms[0].fields.keys()
+['foo']
+>>> tree.forms[0].fields.items()
+[('foo', 'bar')]
+>>> tree.forms[0].fields.values()
+['bar']
From scoder at codespeak.net Wed Feb 13 21:48:40 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 13 Feb 2008 21:48:40 +0100 (CET)
Subject: [Lxml-checkins] r51452 - in lxml/trunk: . src/lxml src/lxml/html
Message-ID: <20080213204840.667B11683FE@codespeak.net>
Author: scoder
Date: Wed Feb 13 21:48:39 2008
New Revision: 51452
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/Makefile
lxml/trunk/src/lxml/classlookup.pxi
lxml/trunk/src/lxml/docloader.pxi
lxml/trunk/src/lxml/dtd.pxi
lxml/trunk/src/lxml/extensions.pxi
lxml/trunk/src/lxml/html/usedoctest.py
lxml/trunk/src/lxml/iterparse.pxi
lxml/trunk/src/lxml/lxml.etree.pyx
lxml/trunk/src/lxml/lxml.objectify.pyx
lxml/trunk/src/lxml/nsclasses.pxi
lxml/trunk/src/lxml/objectpath.pxi
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/relaxng.pxi
lxml/trunk/src/lxml/schematron.pxi
lxml/trunk/src/lxml/usedoctest.py
lxml/trunk/src/lxml/xinclude.pxi
lxml/trunk/src/lxml/xmlerror.pxi
lxml/trunk/src/lxml/xmlid.pxi
lxml/trunk/src/lxml/xmlschema.pxi
lxml/trunk/src/lxml/xpath.pxi
lxml/trunk/src/lxml/xslt.pxi
Log:
r3463 at delle: sbehnel | 2008-02-13 00:07:06 +0100
huge docstring update to make signatures visible
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed Feb 13 21:48:39 2008
@@ -8,6 +8,9 @@
Features added
--------------
+* Docstrings now reflect the signature of functions and methods to
+ make them visible in API docs and ``help()``
+
Bugs fixed
----------
Modified: lxml/trunk/Makefile
==============================================================================
--- lxml/trunk/Makefile (original)
+++ lxml/trunk/Makefile Wed Feb 13 21:48:39 2008
@@ -42,7 +42,7 @@
rm -fr doc/html/api
@[ -x "`which epydoc`" ] \
&& (cd src && echo "Generating API docs ..." && \
- PYTHONPATH=. epydoc -v -o ../doc/html/api --name lxml --url http://codespeak.net/lxml/ lxml/) \
+ PYTHONPATH=. epydoc -v --docformat "restructuredtext en" -o ../doc/html/api --name lxml --url http://codespeak.net/lxml/ lxml/) \
|| (echo "not generating epydoc API documentation")
# XXX What should the default be?
Modified: lxml/trunk/src/lxml/classlookup.pxi
==============================================================================
--- lxml/trunk/src/lxml/classlookup.pxi (original)
+++ lxml/trunk/src/lxml/classlookup.pxi Wed Feb 13 21:48:39 2008
@@ -53,7 +53,9 @@
# class to store element class lookup functions
cdef public class ElementClassLookup [ type LxmlElementClassLookupType,
object LxmlElementClassLookup ]:
- """Superclass of Element class lookups.
+ """ElementClassLookup(self)
+
+ Superclass of Element class lookups.
"""
cdef _element_class_lookup_function _lookup_function
def __init__(self):
@@ -62,18 +64,20 @@
cdef public class FallbackElementClassLookup(ElementClassLookup) \
[ type LxmlFallbackElementClassLookupType,
object LxmlFallbackElementClassLookup ]:
- """Superclass of Element class lookups with additional fallback.
+ """FallbackElementClassLookup(self, fallback=None)
+
+ Superclass of Element class lookups with additional fallback.
"""
cdef readonly ElementClassLookup fallback
cdef _element_class_lookup_function _fallback_function
def __init__(self, ElementClassLookup fallback=None):
self._lookup_function = NULL # use default lookup
if fallback is not None:
- self.setFallback(fallback)
+ self._setFallback(fallback)
else:
self._fallback_function = _lookupDefaultElementClass
- def setFallback(self, ElementClassLookup lookup not None):
+ cdef void _setFallback(self, ElementClassLookup lookup):
"""Sets the fallback scheme for this lookup method.
"""
self.fallback = lookup
@@ -81,6 +85,20 @@
if self._fallback_function is NULL:
self._fallback_function = _lookupDefaultElementClass
+ def set_fallback(self, ElementClassLookup lookup not None):
+ """set_fallback(self, lookup)
+
+ Sets the fallback scheme for this lookup method.
+ """
+ self._setFallback(lookup)
+
+ def setFallback(self, ElementClassLookup lookup not None):
+ """Sets the fallback scheme for this lookup method.
+
+ :deprecated: use ``set_fallback()`` instead.
+ """
+ self._setFallback(lookup)
+
cdef object _callFallback(self, _Document doc, xmlNode* c_node):
return self._fallback_function(self.fallback, doc, c_node)
@@ -89,7 +107,8 @@
# Custom Element class lookup schemes
cdef class ElementDefaultClassLookup(ElementClassLookup):
- """Element class lookup scheme that always returns the default Element
+ """ElementDefaultClassLookup(self, element=None, comment=None, pi=None, entity=None)
+ Element class lookup scheme that always returns the default Element
class.
The keyword arguments ``element``, ``comment``, ``pi`` and ``entity``
@@ -163,13 +182,14 @@
assert 0, "Unknown node type: %s" % c_node.type
cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup):
- """Checks an attribute of an Element and looks up the value in a class
- dictionary.
+ """AttributeBasedElementClassLookup(self, attribute_name, class_mapping, fallback=None)
+ Checks an attribute of an Element and looks up the value in a
+ class dictionary.
Arguments:
- * attribute name - '{ns}name' style string
- * class mapping - Python dict mapping attribute values to Element classes
- * fallback - optional fallback lookup mechanism
+ - attribute name - '{ns}name' style string
+ - class mapping - Python dict mapping attribute values to Element classes
+ - fallback - optional fallback lookup mechanism
A None key in the class mapping will be checked if the attribute is
missing.
@@ -207,7 +227,8 @@
cdef class ParserBasedElementClassLookup(FallbackElementClassLookup):
- """Element class lookup based on the XML parser.
+ """ParserBasedElementClassLookup(self, fallback=None)
+ Element class lookup based on the XML parser.
"""
def __init__(self, ElementClassLookup fallback=None):
FallbackElementClassLookup.__init__(self, fallback)
@@ -221,7 +242,8 @@
cdef class CustomElementClassLookup(FallbackElementClassLookup):
- """Element class lookup based on a subclass method.
+ """CustomElementClassLookup(self, fallback=None)
+ Element class lookup based on a subclass method.
You can inherit from this class and override the method::
@@ -240,6 +262,7 @@
self._lookup_function = _custom_class_lookup
def lookup(self, type, doc, namespace, name):
+ "lookup(self, type, doc, namespace, name)"
return None
cdef object _custom_class_lookup(state, _Document doc, xmlNode* c_node):
@@ -291,11 +314,14 @@
LOOKUP_ELEMENT_CLASS = function
def setElementClassLookup(ElementClassLookup lookup = None):
- "@deprecated: use ``set_element_class_lookup(lookup)`` instead"
+ ":deprecated: use ``set_element_class_lookup(lookup)`` instead"
set_element_class_lookup(lookup)
def set_element_class_lookup(ElementClassLookup lookup = None):
- "Set the global default element class lookup method."
+ """set_element_class_lookup(lookup = None)
+
+ Set the global default element class lookup method.
+ """
if lookup is None or lookup._lookup_function is NULL:
_setElementClassLookupFunction(NULL, None)
else:
Modified: lxml/trunk/src/lxml/docloader.pxi
==============================================================================
--- lxml/trunk/src/lxml/docloader.pxi (original)
+++ lxml/trunk/src/lxml/docloader.pxi Wed Feb 13 21:48:39 2008
@@ -15,7 +15,9 @@
cdef class Resolver:
"This is the base class of all resolvers."
def resolve(self, system_url, public_id, context):
- """Override this method to resolve an external source by
+ """resolve(self, system_url, public_id, context)
+
+ Override this method to resolve an external source by
``system_url`` and ``public_id``. The third argument is an
opaque context object.
@@ -24,7 +26,9 @@
return None
def resolve_empty(self, context):
- """Return an empty input document.
+ """resolve_empty(self, context)
+
+ Return an empty input document.
Pass context as parameter.
"""
@@ -34,7 +38,9 @@
return doc_ref
def resolve_string(self, string, context, *, base_url=None):
- """Return a parsable string as input document.
+ """resolve_string(self, string, context, base_url=None)
+
+ Return a parsable string as input document.
Pass data string and context as parameters.
@@ -49,7 +55,9 @@
return doc_ref
def resolve_filename(self, filename, context):
- """Return the name of a parsable file as input document.
+ """resolve_filename(self, filename, context)
+
+ Return the name of a parsable file as input document.
Pass filename and context as parameters.
"""
@@ -60,7 +68,9 @@
return doc_ref
def resolve_file(self, f, context):
- """Return an open file-like object as input document.
+ """resolve_file(self, f, context)
+
+ Return an open file-like object as input document.
Pass open file and context as parameters.
"""
@@ -83,7 +93,9 @@
self._default_resolver = default_resolver
def add(self, Resolver resolver not None):
- """Register a resolver.
+ """add(self, resolver)
+
+ Register a resolver.
For each requested entity, the 'resolve' method of the resolver will
be called and the result will be passed to the parser. If this method
@@ -94,6 +106,7 @@
self._resolvers.add(resolver)
def remove(self, resolver):
+ "remove(self, resolver)"
self._resolvers.discard(resolver)
cdef _ResolverRegistry _copy(self):
@@ -103,9 +116,11 @@
return registry
def copy(self):
+ "copy(self)"
return self._copy()
def resolve(self, system_url, public_id, context):
+ "resolve(self, system_url, public_id, context)"
for resolver in self._resolvers:
result = resolver.resolve(system_url, public_id, context)
if result is not None:
Modified: lxml/trunk/src/lxml/dtd.pxi
==============================================================================
--- lxml/trunk/src/lxml/dtd.pxi (original)
+++ lxml/trunk/src/lxml/dtd.pxi Wed Feb 13 21:48:39 2008
@@ -20,7 +20,8 @@
# DTD
cdef class DTD(_Validator):
- """A DTD validator.
+ """DTD(self, file=None, external_id=None)
+ A DTD validator.
Can load from filesystem directly given a filename or file-like object.
Alternatively, pass the keyword parameter ``external_id`` to load from a
@@ -56,7 +57,9 @@
tree.xmlFreeDtd(self._c_dtd)
def __call__(self, etree):
- """Validate doc using the DTD.
+ """__call__(self, etree)
+
+ Validate doc using the DTD.
Returns true if the document is valid, false if not.
"""
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Wed Feb 13 21:48:39 2008
@@ -326,7 +326,9 @@
self._temp_refs.add((<_Element>o)._doc)
def Extension(module, function_mapping=None, *, ns=None):
- """Build a dictionary of extension functions from the functions
+ """Extension(module, function_mapping=None, ns=None)
+
+ Build a dictionary of extension functions from the functions
defined in a module or the methods of an object.
As second argument, you can pass an additional mapping of
Modified: lxml/trunk/src/lxml/html/usedoctest.py
==============================================================================
--- lxml/trunk/src/lxml/html/usedoctest.py (original)
+++ lxml/trunk/src/lxml/html/usedoctest.py Wed Feb 13 21:48:39 2008
@@ -1,3 +1,13 @@
+"""Doctest module for HTML comparison.
+
+Usage::
+
+ >>> import lxml.html.usedoctest
+ >>> # now do your HTML doctests ...
+
+See `lxml.doctestcompare`.
+"""
+
from lxml import doctestcompare
doctestcompare.temp_install(html=True, del_module=__name__)
Modified: lxml/trunk/src/lxml/iterparse.pxi
==============================================================================
--- lxml/trunk/src/lxml/iterparse.pxi (original)
+++ lxml/trunk/src/lxml/iterparse.pxi Wed Feb 13 21:48:39 2008
@@ -239,9 +239,12 @@
origEnd(ctxt, name)
cdef class iterparse(_BaseParser):
- """Incremental parser. Parses XML into a tree and generates tuples
- (event, element) in a SAX-like fashion. ``event`` is any of 'start',
- 'end', 'start-ns', 'end-ns'.
+ """iterparse(self, source, events=("end",), tag=None, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, remove_blank_text=False, remove_comments=False, remove_pis=False, encoding=None, html=False, schema=None)
+ Incremental parser.
+
+ Parses XML into a tree and generates tuples (event, element) in a
+ SAX-like fashion. ``event`` is any of 'start', 'end', 'start-ns',
+ 'end-ns'.
For 'start' and 'end', ``element`` is the Element that the parser just
found opening or closing. For 'start-ns', it is a tuple (prefix, URI) of
@@ -262,17 +265,17 @@
attribute default values are requested.
Available boolean keyword arguments:
- * attribute_defaults - read default attributes from DTD
- * dtd_validation - validate (if DTD is available)
- * load_dtd - use DTD for parsing
- * no_network - prevent network access for related files
- * remove_blank_text - discard blank text nodes
- * remove_comments - discard comments
- * remove_pis - discard processing instructions
+ - attribute_defaults - read default attributes from DTD
+ - dtd_validation - validate (if DTD is available)
+ - load_dtd - use DTD for parsing
+ - no_network - prevent network access for related files
+ - remove_blank_text - discard blank text nodes
+ - remove_comments - discard comments
+ - remove_pis - discard processing instructions
Other keyword arguments:
- * encoding - override the document encoding
- * schema - an XMLSchema to validate against
+ - encoding - override the document encoding
+ - schema - an XMLSchema to validate against
"""
cdef object _source
cdef readonly object root
@@ -397,8 +400,10 @@
cdef class iterwalk:
- """A tree walker that generates events from an existing tree as if it was
- parsing XML data with ``iterparse()``.
+ """iterwalk(self, element_or_tree, events=("end",), tag=None)
+
+ A tree walker that generates events from an existing tree as if it
+ was parsing XML data with ``iterparse()``.
"""
cdef object _node_stack
cdef object _pop_node
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Wed Feb 13 21:48:39 2008
@@ -1,3 +1,9 @@
+"""The lxml.etree module implements the extended ElementTree API for
+XML.
+"""
+
+__docformat__ = "restructuredtext en"
+
cimport tree, python, config
from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs
from python cimport callable, _cstr, _isString
@@ -214,7 +220,9 @@
cdef class QName:
- """QName wrapper.
+ """QName(text_or_uri, tag=None)
+
+ QName wrapper.
Pass a tag name by itself or a namespace URI and a tag name to
create a qualified name. The ``text`` property holds the
@@ -510,7 +518,9 @@
cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
- """Element class. References a document object and a libxml node.
+ """Element class.
+
+ References a document object and a libxml node.
By pointing to a Document instance, a reference is kept to
_Document as long as there is some pointer to a node in it.
@@ -522,7 +532,9 @@
cdef object _attrib
def _init(self):
- """Called after object initialisation. Custom subclasses may override
+ """_init(self)
+
+ Called after object initialisation. Custom subclasses may override
this if they recursively call _init() in the superclasses.
"""
@@ -537,7 +549,9 @@
# MANIPULATORS
def __setitem__(self, x, value):
- """Replaces the given subelement index or slice.
+ """__setitem__(self, x, value)
+
+ Replaces the given subelement index or slice.
"""
cdef xmlNode* c_node
cdef xmlNode* c_next
@@ -571,7 +585,9 @@
moveNodeToDocument(self._doc, c_node)
def __delitem__(self, x):
- """Deletes the given subelement or a slice.
+ """__delitem__(self, x)
+
+ Deletes the given subelement or a slice.
"""
cdef xmlNode* c_node
cdef xmlNode* c_next
@@ -599,9 +615,11 @@
_removeNode(self._doc, c_node)
def __deepcopy__(self, memo):
+ "__deepcopy__(self, memo)"
return self.__copy__()
def __copy__(self):
+ "__copy__(self)"
cdef xmlDoc* c_doc
cdef xmlNode* c_node
cdef _Document new_doc
@@ -619,17 +637,23 @@
return _elementFactory(new_doc, c_node)
def set(self, key, value):
- """Sets an element attribute.
+ """set(self, key, value)
+
+ Sets an element attribute.
"""
_setAttributeValue(self, key, value)
def append(self, _Element element not None):
- """Adds a subelement to the end of this element.
+ """append(self, element)
+
+ Adds a subelement to the end of this element.
"""
_appendChild(self, element)
def addnext(self, _Element element):
- """Adds the element as a following sibling directly after this
+ """addnext(self, element)
+
+ Adds the element as a following sibling directly after this
element.
This is normally used to set a processing instruction or comment after
@@ -644,7 +668,9 @@
_appendSibling(self, element)
def addprevious(self, _Element element):
- """Adds the element as a preceding sibling directly before this
+ """addprevious(self, element)
+
+ Adds the element as a preceding sibling directly before this
element.
This is normally used to set a processing instruction or comment
@@ -659,13 +685,17 @@
_prependSibling(self, element)
def extend(self, elements):
- """Extends the current children by the elements in the iterable.
+ """extend(self, elements)
+
+ Extends the current children by the elements in the iterable.
"""
for element in elements:
_appendChild(self, element)
def clear(self):
- """Resets an element. This function removes all subelements, clears
+ """clear(self)
+
+ Resets an element. This function removes all subelements, clears
all attributes and sets the text and tail properties to None.
"""
cdef xmlAttr* c_attr
@@ -693,7 +723,9 @@
c_node = c_node_next
def insert(self, index, _Element element not None):
- """Inserts a subelement at the given position in this element
+ """insert(self, index, element)
+
+ Inserts a subelement at the given position in this element
"""
cdef xmlNode* c_node
cdef xmlNode* c_next
@@ -707,7 +739,9 @@
moveNodeToDocument(self._doc, element._c_node)
def remove(self, _Element element not None):
- """Removes a matching subelement. Unlike the find methods, this
+ """remove(self, element)
+
+ Removes a matching subelement. Unlike the find methods, this
method compares elements based on identity, not on tag value
or contents.
"""
@@ -724,7 +758,9 @@
def replace(self, _Element old_element not None,
_Element new_element not None):
- """Replaces a subelement with the element passed as second argument.
+ """replace(self, old_element, new_element)
+
+ Replaces a subelement with the element passed as second argument.
"""
cdef xmlNode* c_old_node
cdef xmlNode* c_old_next
@@ -862,6 +898,7 @@
# ACCESSORS
def __repr__(self):
+ "__repr__(self)"
return "" % (self.tag, id(self))
def __getitem__(self, x):
@@ -901,11 +938,14 @@
return _elementFactory(self._doc, c_node)
def __len__(self):
- """Returns the number of subelements.
+ """__len__(self)
+
+ Returns the number of subelements.
"""
return _countElements(self._c_node.children)
def __nonzero__(self):
+ "__nonzero__(self)"
import warnings
warnings.warn(
"The behavior of this method will change in future versions. "
@@ -916,6 +956,7 @@
return _hasChild(self._c_node)
def __contains__(self, element):
+ "__contains__(self, element)"
cdef xmlNode* c_node
if not isinstance(element, _Element):
return 0
@@ -923,13 +964,17 @@
return c_node is not NULL and c_node.parent is self._c_node
def __iter__(self):
+ "__iter__(self)"
return ElementChildIterator(self)
def __reversed__(self):
+ "__reversed__(self)"
return ElementChildIterator(self, reversed=True)
def index(self, _Element child not None, start=None, stop=None):
- """Find the position of the child within the parent.
+ """index(self, child, start=None, stop=None)
+
+ Find the position of the child within the parent.
This method is not part of the original ElementTree API.
"""
@@ -1012,40 +1057,52 @@
raise ValueError("list.index(x): x not in list")
def get(self, key, default=None):
- """Gets an element attribute.
+ """get(self, key, default=None)
+
+ Gets an element attribute.
"""
return _getAttributeValue(self, key, default)
def keys(self):
- """Gets a list of attribute names. The names are returned in an
+ """keys(self)
+
+ Gets a list of attribute names. The names are returned in an
arbitrary order (just like for an ordinary Python dictionary).
"""
return _collectAttributes(self._c_node, 1)
def values(self):
- """Gets element attribute values as a sequence of strings. The
+ """values(self)
+
+ Gets element attribute values as a sequence of strings. The
attributes are returned in an arbitrary order.
"""
return _collectAttributes(self._c_node, 2)
def items(self):
- """Gets element attributes, as a sequence. The attributes are returned in
+ """items(self)
+
+ Gets element attributes, as a sequence. The attributes are returned in
an arbitrary order.
"""
return _collectAttributes(self._c_node, 3)
def getchildren(self):
- """Returns all direct children. The elements are returned in document
+ """getchildren(self)
+
+ Returns all direct children. The elements are returned in document
order.
- @deprecated: Note that this method has been deprecated as of
- ElementTree 1.3 and lxml 2.0. New code should use
- ``list(element)`` or simply iterate over elements.
+ :deprecated: Note that this method has been deprecated as of
+ ElementTree 1.3 and lxml 2.0. New code should use
+ ``list(element)`` or simply iterate over elements.
"""
return _collectChildren(self)
def getparent(self):
- """Returns the parent of this element or None for the root element.
+ """getparent(self)
+
+ Returns the parent of this element or None for the root element.
"""
cdef xmlNode* c_node
c_node = _parentElement(self._c_node)
@@ -1055,7 +1112,9 @@
return _elementFactory(self._doc, c_node)
def getnext(self):
- """Returns the following sibling of this element or None.
+ """getnext(self)
+
+ Returns the following sibling of this element or None.
"""
cdef xmlNode* c_node
c_node = _nextElement(self._c_node)
@@ -1064,7 +1123,9 @@
return None
def getprevious(self):
- """Returns the preceding sibling of this element or None.
+ """getprevious(self)
+
+ Returns the preceding sibling of this element or None.
"""
cdef xmlNode* c_node
c_node = _previousElement(self._c_node)
@@ -1073,7 +1134,9 @@
return None
def itersiblings(self, tag=None, *, preceding=False):
- """Iterate over the following or preceding siblings of this element.
+ """itersiblings(self, tag=None, preceding=False)
+
+ Iterate over the following or preceding siblings of this element.
The direction is determined by the 'preceding' keyword which defaults
to False, i.e. forward iteration over the following siblings. The
@@ -1083,7 +1146,9 @@
return SiblingsIterator(self, tag, preceding=preceding)
def iterancestors(self, tag=None):
- """Iterate over the ancestors of this element (from parent to parent).
+ """iterancestors(self, tag=None)
+
+ Iterate over the ancestors of this element (from parent to parent).
The generated elements can be restricted to a specific tag name with
the 'tag' keyword.
@@ -1091,7 +1156,9 @@
return AncestorsIterator(self, tag)
def iterdescendants(self, tag=None):
- """Iterate over the descendants of this element in document order.
+ """iterdescendants(self, tag=None)
+
+ Iterate over the descendants of this element in document order.
As opposed to ``el.iter()``, this iterator does not yield the element
itself. The generated elements can be restricted to a specific tag
@@ -1100,7 +1167,9 @@
return ElementDepthFirstIterator(self, tag, inclusive=False)
def iterchildren(self, tag=None, *, reversed=False):
- """Iterate over the children of this element.
+ """iterchildren(self, tag=None, reversed=False)
+
+ Iterate over the children of this element.
As opposed to using normal iteration on this element, the generated
elements can be restricted to a specific tag name with the 'tag'
@@ -1109,7 +1178,9 @@
return ElementChildIterator(self, tag, reversed=reversed)
def getroottree(self):
- """Return an ElementTree for the root node of the document that
+ """getroottree(self)
+
+ Return an ElementTree for the root node of the document that
contains this element.
This is the same as following element.getparent() up the tree until it
@@ -1118,7 +1189,9 @@
return _elementTreeFactory(self._doc, None)
def getiterator(self, tag=None):
- """Returns a sequence or iterator of all elements in the subtree in
+ """getiterator(self, tag=None)
+
+ Returns a sequence or iterator of all elements in the subtree in
document order (depth first pre-order), starting with this
element.
@@ -1128,18 +1201,20 @@
You can also pass the Element, Comment, ProcessingInstruction and
Entity factory functions to look only for the specific element type.
- @deprecated: Note that this method is deprecated as of
- ElementTree 1.3 and lxml 2.0. It returns an iterator in lxml,
- which diverges from the original ElementTree behaviour. If
- you want an efficient iterator, use the ``element.iter()``
- method instead. You should only use this method in new code
- if you require backwards compatibility with older versions of
- lxml or ElementTree.
+ :deprecated: Note that this method is deprecated as of
+ ElementTree 1.3 and lxml 2.0. It returns an iterator in
+ lxml, which diverges from the original ElementTree
+ behaviour. If you want an efficient iterator, use the
+ ``element.iter()`` method instead. You should only use this
+ method in new code if you require backwards compatibility
+ with older versions of lxml or ElementTree.
"""
return ElementDepthFirstIterator(self, tag)
def iter(self, tag=None):
- """Iterate over all elements in the subtree in document order (depth
+ """iter(self, tag=None)
+
+ Iterate over all elements in the subtree in document order (depth
first pre-order), starting with this element.
Can be restricted to find only elements with a specific tag
@@ -1151,7 +1226,9 @@
return ElementDepthFirstIterator(self, tag)
def itertext(self, tag=None, *, with_tail=True):
- """Iterates over the text content of a subtree.
+ """itertext(self, tag=None, with_tail=True)
+
+ Iterates over the text content of a subtree.
You can pass the ``tag`` keyword argument to restrict text content to
a specific tag name.
@@ -1162,41 +1239,53 @@
return ElementTextIterator(self, tag, with_tail=with_tail)
def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
- """Creates a new element associated with the same document.
+ """makeelement(self, _tag, attrib=None, nsmap=None, **_extra)
+
+ Creates a new element associated with the same document.
"""
return _makeElement(_tag, NULL, self._doc, None, None, None,
attrib, nsmap, _extra)
def find(self, path):
- """Finds the first matching subelement, by tag name or path.
+ """find(self, path)
+
+ Finds the first matching subelement, by tag name or path.
"""
if isinstance(path, QName):
path = (path).text
return _elementpath.find(self, path)
def findtext(self, path, default=None):
- """Finds text for the first matching subelement, by tag name or path.
+ """findtext(self, path, default=None)
+
+ Finds text for the first matching subelement, by tag name or path.
"""
if isinstance(path, QName):
path = (path).text
return _elementpath.findtext(self, path, default)
def findall(self, path):
- """Finds all matching subelements, by tag name or path.
+ """findall(self, path)
+
+ Finds all matching subelements, by tag name or path.
"""
if isinstance(path, QName):
path = (path).text
return _elementpath.findall(self, path)
def iterfind(self, path):
- """Iterates over all matching subelements, by tag name or path.
+ """iterfind(self, path)
+
+ Iterates over all matching subelements, by tag name or path.
"""
if isinstance(path, QName):
path = (path).text
return _elementpath.iterfind(self, path)
def xpath(self, _path, *, namespaces=None, extensions=None, **_variables):
- """Evaluate an xpath expression using the element as context node.
+ """xpath(self, _path, namespaces=None, extensions=None, **_variables)
+
+ Evaluate an xpath expression using the element as context node.
"""
evaluator = XPathElementEvaluator(self, namespaces=namespaces,
extensions=extensions)
@@ -1260,15 +1349,19 @@
raise TypeError("this element does not have children or attributes")
def set(self, key, value):
+ "set(self, key, value)"
self._raiseImmutable()
def append(self, value):
+ "append(self, value)"
self._raiseImmutable()
def insert(self, index, value):
+ "insert(self, index, value)"
self._raiseImmutable()
def __setitem__(self, index, value):
+ "__setitem__(self, index, value)"
self._raiseImmutable()
property attrib:
@@ -1294,24 +1387,30 @@
# ACCESSORS
def __getitem__(self, x):
+ "__getitem__(self, x)"
if python.PySlice_Check(x):
return []
else:
raise IndexError("list index out of range")
def __len__(self):
+ "__len__(self)"
return 0
def get(self, key, default=None):
+ "get(self, key, default=None)"
return None
def keys(self):
+ "keys(self)"
return []
def items(self):
+ "items(self)"
return []
def values(self):
+ "values(self)"
return []
cdef class _Comment(__ContentOnlyElement):
@@ -1390,7 +1489,9 @@
"ElementTree not initialized, missing root"
def parse(self, source, _BaseParser parser=None):
- """Updates self with the content of source and returns its root
+ """parse(self, source, parser=None)
+
+ Updates self with the content of source and returns its root
"""
cdef _Document doc
doc = _parseDocument(source, parser)
@@ -1402,7 +1503,9 @@
return self._context_node
def _setroot(self, _Element root not None):
- """Relocate the ElementTree to a new root node.
+ """_setroot(self, root)
+
+ Relocate the ElementTree to a new root node.
"""
if root._c_node.type != tree.XML_ELEMENT_NODE:
raise TypeError("Only elements can be the root of an ElementTree")
@@ -1410,7 +1513,9 @@
self._doc = None
def getroot(self):
- """Gets the root element for this tree.
+ """getroot(self)
+
+ Gets the root element for this tree.
"""
return self._context_node
@@ -1445,7 +1550,10 @@
def write(self, file, *, encoding=None, method="xml",
pretty_print=False, xml_declaration=None, with_tail=True):
- """Write the tree to a file or file-like object.
+ """write(self, file, encoding=None, method="xml",
+ pretty_print=False, xml_declaration=None, with_tail=True)
+
+ Write the tree to a file or file-like object.
Defaults to ASCII encoding and writing a declaration as needed.
@@ -1470,7 +1578,9 @@
write_declaration, 1, pretty_print, with_tail)
def getpath(self, _Element element not None):
- """Returns a structural, absolute XPath expression to find that element.
+ """getpath(self, element)
+
+ Returns a structural, absolute XPath expression to find that element.
"""
cdef _Document doc
cdef xmlDoc* c_doc
@@ -1488,7 +1598,9 @@
return path
def getiterator(self, tag=None):
- """Returns a sequence or iterator of all elements in document order
+ """getiterator(self, tag=None)
+
+ Returns a sequence or iterator of all elements in document order
(depth first pre-order), starting with the root element.
Can be restricted to find only elements with a specific tag
@@ -1498,13 +1610,13 @@
You can also pass the Element, Comment, ProcessingInstruction and
Entity factory functions to look only for the specific element type.
- @deprecated: Note that this method is deprecated as of
- ElementTree 1.3 and lxml 2.0. It returns an iterator in lxml,
- which diverges from the original ElementTree behaviour. If
- you want an efficient iterator, use the ``tree.iter()`` method
- instead. You should only use this method in new code if you
- require backwards compatibility with older versions of lxml or
- ElementTree.
+ :deprecated: Note that this method is deprecated as of
+ ElementTree 1.3 and lxml 2.0. It returns an iterator in
+ lxml, which diverges from the original ElementTree
+ behaviour. If you want an efficient iterator, use the
+ ``tree.iter()`` method instead. You should only use this
+ method in new code if you require backwards compatibility
+ with older versions of lxml or ElementTree.
"""
root = self.getroot()
if root is None:
@@ -1512,7 +1624,9 @@
return root.getiterator(tag)
def iter(self, tag=None):
- """Creates an iterator for the root element. The iterator loops over
+ """iter(self, tag=None)
+
+ Creates an iterator for the root element. The iterator loops over
all elements in this tree, in document order.
"""
root = self.getroot()
@@ -1521,7 +1635,9 @@
return root.iter(tag)
def find(self, path):
- """Finds the first toplevel element with given tag. Same as
+ """find(self, path)
+
+ Finds the first toplevel element with given tag. Same as
``tree.getroot().find(path)``.
"""
self._assertHasRoot()
@@ -1531,7 +1647,9 @@
return root.find(path)
def findtext(self, path, default=None):
- """Finds the text for the first element matching the ElementPath
+ """findtext(self, path, default=None)
+
+ Finds the text for the first element matching the ElementPath
expression. Same as getroot().findtext(path)
"""
self._assertHasRoot()
@@ -1541,7 +1659,9 @@
return root.findtext(path, default)
def findall(self, path):
- """Finds all elements matching the ElementPath expression. Same as
+ """findall(self, path)
+
+ Finds all elements matching the ElementPath expression. Same as
getroot().findall(path).
"""
self._assertHasRoot()
@@ -1551,7 +1671,9 @@
return root.findall(path)
def iterfind(self, path):
- """Iterates over all elements matching the ElementPath expression.
+ """iterfind(self, path)
+
+ Iterates over all elements matching the ElementPath expression.
Same as getroot().finditer(path).
"""
self._assertHasRoot()
@@ -1561,7 +1683,9 @@
return root.iterfind(path)
def xpath(self, _path, *, namespaces=None, extensions=None, **_variables):
- """XPath evaluate in context of document.
+ """xpath(self, _path, namespaces=None, extensions=None, **_variables)
+
+ XPath evaluate in context of document.
``namespaces`` is an optional dictionary with prefix to namespace URI
mappings, used by XPath. ``extensions`` defines additional extension
@@ -1582,7 +1706,9 @@
return evaluator.evaluate(_path, **_variables)
def xslt(self, _xslt, extensions=None, access_control=None, **_kw):
- """Transform this document using other document.
+ """xslt(self, _xslt, extensions=None, access_control=None, **_kw)
+
+ Transform this document using other document.
xslt is a tree that should be XSLT
keyword parameters are XSLT transformation parameters.
@@ -1599,7 +1725,9 @@
return style(self, **_kw)
def relaxng(self, relaxng):
- """Validate this document using other document.
+ """relaxng(self, relaxng)
+
+ Validate this document using other document.
The relaxng argument is a tree that should contain a Relax NG schema.
@@ -1615,7 +1743,9 @@
return schema.validate(self)
def xmlschema(self, xmlschema):
- """Validate this document using other document.
+ """xmlschema(self, xmlschema)
+
+ Validate this document using other document.
The xmlschema argument is a tree that should contain an XML Schema.
@@ -1631,7 +1761,9 @@
return schema.validate(self)
def xinclude(self):
- """Process the XInclude nodes in this document and include the
+ """xinclude(self)
+
+ Process the XInclude nodes in this document and include the
referenced XML fragments.
There is support for loading files through the file system, HTTP and
@@ -1645,7 +1777,9 @@
XInclude()(self._context_node)
def write_c14n(self, file):
- """C14N write of document. Always writes UTF-8.
+ """write_c14n(self, file)
+
+ C14N write of document. Always writes UTF-8.
"""
self._assertHasRoot()
_tofilelikeC14N(file, self._context_node)
@@ -1666,9 +1800,7 @@
cdef class _Attrib:
- """A proxy for the ``Element.attrib`` property.
-
- Behaves as a normal Python dict.
+ """A dict-like proxy for the ``Element.attrib`` property.
"""
cdef _Element _element
def __init__(self, _Element element not None):
@@ -1895,7 +2027,9 @@
return current_node
cdef class ElementChildIterator(_ElementIterator):
- "Iterates over the children of an element."
+ """ElementChildIterator(self, node, tag=None, reversed=False)
+ Iterates over the children of an element.
+ """
def __init__(self, _Element node not None, tag=None, *, reversed=False):
cdef xmlNode* c_node
self._initTagMatch(tag)
@@ -1916,7 +2050,8 @@
self._node = _elementFactory(node._doc, c_node)
cdef class SiblingsIterator(_ElementIterator):
- """Iterates over the siblings of an element.
+ """SiblingsIterator(self, node, tag=None, preceding=False)
+ Iterates over the siblings of an element.
You can pass the boolean keyword ``preceding`` to specify the direction.
"""
@@ -1929,18 +2064,24 @@
self._storeNext(node)
cdef class AncestorsIterator(_ElementIterator):
- "Iterates over the ancestors of an element (from parent to parent)."
+ """AncestorsIterator(self, node, tag=None)
+ Iterates over the ancestors of an element (from parent to parent).
+ """
def __init__(self, _Element node not None, tag=None):
self._initTagMatch(tag)
self._next_element = _parentElement
self._storeNext(node)
cdef class ElementDepthFirstIterator(_ElementTagMatcher):
- """Iterates over an element and its sub-elements in document order (depth
- first pre-order). Note that this also includes comments, entities and
- processing instructions. To filter them out, check if the ``tag``
- property of the returned element is a string (i.e. not None and not a
- factory function), or pass the ``Element`` factory for the ``tag`` keyword.
+ """ElementDepthFirstIterator(self, node, tag=None, inclusive=True)
+ Iterates over an element and its sub-elements in document order (depth
+ first pre-order).
+
+ Note that this also includes comments, entities and processing
+ instructions. To filter them out, check if the ``tag`` property
+ of the returned element is a string (i.e. not None and not a
+ factory function), or pass the ``Element`` factory for the ``tag``
+ keyword.
If the optional ``tag`` argument is not None, the iterator returns only
the elements that match the respective name and namespace.
@@ -2003,7 +2144,8 @@
return NULL
cdef class ElementTextIterator:
- """Iterates over the text content of a subtree.
+ """ElementTextIterator(self, element, tag=None, with_tail=True)
+ Iterates over the text content of a subtree.
You can pass the ``tag`` keyword argument to restrict text content to a
specific tag name.
@@ -2057,7 +2199,9 @@
# module-level API for ElementTree
def Element(_tag, attrib=None, nsmap=None, **_extra):
- """Element factory. This function returns an object implementing the
+ """Element(_tag, attrib=None, nsmap=None, **_extra)
+
+ Element factory. This function returns an object implementing the
Element interface.
"""
### also look at _Element.makeelement() and _BaseParser.makeelement() ###
@@ -2065,7 +2209,9 @@
attrib, nsmap, _extra)
def Comment(text=None):
- """Comment element factory. This factory function creates a special element that will
+ """Comment(text=None)
+
+ Comment element factory. This factory function creates a special element that will
be serialized as an XML comment.
"""
cdef _Document doc
@@ -2082,7 +2228,9 @@
return _elementFactory(doc, c_node)
def ProcessingInstruction(target, text=None):
- """ProcessingInstruction element factory. This factory function creates a
+ """ProcessingInstruction(target, text=None)
+
+ ProcessingInstruction element factory. This factory function creates a
special element that will be serialized as an XML processing instruction.
"""
cdef _Document doc
@@ -2102,7 +2250,9 @@
PI = ProcessingInstruction
def Entity(name):
- """Entity factory. This factory function creates a special element
+ """Entity(name)
+
+ Entity factory. This factory function creates a special element
that will be serialized as an XML entity reference or character
reference. Note, however, that entities will not be automatically
declared in the document. A document that uses entity references
@@ -2127,13 +2277,17 @@
def SubElement(_Element _parent not None, _tag,
attrib=None, nsmap=None, **_extra):
- """Subelement factory. This function creates an element instance, and
+ """SubElement(_parent, _tag, attrib=None, nsmap=None, **_extra)
+
+ Subelement factory. This function creates an element instance, and
appends it to an existing element.
"""
return _makeSubElement(_parent, _tag, None, None, attrib, nsmap, _extra)
def ElementTree(_Element element=None, *, file=None, _BaseParser parser=None):
- """ElementTree wrapper class.
+ """ElementTree(element=None, file=None, parser=None)
+
+ ElementTree wrapper class.
"""
cdef xmlNode* c_next
cdef xmlNode* c_node
@@ -2156,7 +2310,9 @@
return _elementTreeFactory(doc, element)
def HTML(text, _BaseParser parser=None, *, base_url=None):
- """Parses an HTML document from a string constant. This function can be used
+ """HTML(text, parser=None, base_url=None)
+
+ Parses an HTML document from a string constant. This function can be used
to embed "HTML literals" in Python code.
To override the parser with a different ``HTMLParser`` you can pass it to
@@ -2178,7 +2334,9 @@
return result_container.result
def XML(text, _BaseParser parser=None, *, base_url=None):
- """Parses an XML document from a string constant. This function can be used
+ """XML(text, parser=None, base_url=None)
+
+ Parses an XML document from a string constant. This function can be used
to embed "XML literals" in Python code, like in
>>> root = etree.XML("")
@@ -2202,7 +2360,9 @@
return result_container.result
def fromstring(text, _BaseParser parser=None, *, base_url=None):
- """Parses an XML document from a string.
+ """fromstring(text, parser=None, base_url=None)
+
+ Parses an XML document from a string.
To override the default parser with a different parser you can pass it to
the ``parser`` keyword argument.
@@ -2219,7 +2379,9 @@
return result_container.result
def fromstringlist(strings, _BaseParser parser=None):
- """Parses an XML document from a sequence of strings.
+ """fromstringlist(strings, parser=None)
+
+ Parses an XML document from a sequence of strings.
To override the default parser with a different parser you can pass it to
the ``parser`` keyword argument.
@@ -2233,19 +2395,26 @@
return parser.close()
def iselement(element):
- """Checks if an object appears to be a valid element object.
+ """iselement(element)
+
+ Checks if an object appears to be a valid element object.
"""
return isinstance(element, _Element)
def dump(_Element elem not None, *, pretty_print=True, with_tail=True):
- """Writes an element tree or element structure to sys.stdout. This function
+ """dump(elem, pretty_print=True, with_tail=True)
+
+ Writes an element tree or element structure to sys.stdout. This function
should be used for debugging only.
"""
_dumpToFile(sys.stdout, elem._c_node, pretty_print, with_tail)
def tostring(element_or_tree, *, encoding=None, method="xml",
xml_declaration=None, pretty_print=False, with_tail=True):
- """Serialize an element to an encoded string representation of its XML
+ """tostring(element_or_tree, encoding=None, method="xml",
+ xml_declaration=None, pretty_print=False, with_tail=True)
+
+ Serialize an element to an encoded string representation of its XML
tree.
Defaults to ASCII encoding without XML declaration. This behaviour can be
@@ -2292,7 +2461,9 @@
type(element_or_tree))
def tostringlist(element_or_tree, *args, **kwargs):
- """Serialize an element to an encoded string representation of its XML
+ """tostringlist(element_or_tree, *args, **kwargs)
+
+ Serialize an element to an encoded string representation of its XML
tree, stored in a list of partial strings.
This is purely for ElementTree 1.3 compatibility. The result is a
@@ -2302,7 +2473,10 @@
def tounicode(element_or_tree, *, method="xml", pretty_print=False,
with_tail=True):
- """Serialize an element to the Python unicode representation of its XML
+ """tounicode(element_or_tree, method="xml", pretty_print=False,
+ with_tail=True)
+
+ Serialize an element to the Python unicode representation of its XML
tree.
Note that the result does not carry an XML encoding declaration and is
@@ -2318,7 +2492,7 @@
by passing the boolean ``with_tail`` option. This has no impact
on the tail text of children, which will always be serialised.
- @deprecated: use ``tostring(el, encoding=unicode)`` instead.
+ :deprecated: use ``tostring(el, encoding=unicode)`` instead.
"""
if isinstance(element_or_tree, _Element):
return _tounicode(<_Element>element_or_tree, method, 0, pretty_print,
@@ -2331,7 +2505,9 @@
type(element_or_tree))
def parse(source, _BaseParser parser=None):
- """Return an ElementTree object loaded with source elements. If no parser
+ """parse(source, parser=None)
+
+ Return an ElementTree object loaded with source elements. If no parser
is provided as second argument, the default parser is used.
"""
cdef _Document doc
@@ -2366,8 +2542,10 @@
# Validation
class DocumentInvalid(LxmlError):
- """Validation error. Raised by all document validators when their
- ``assertValid(tree)`` method fails.
+ """Validation error.
+
+ Raised by all document validators when their ``assertValid(tree)``
+ method fails.
"""
pass
@@ -2375,28 +2553,39 @@
"Base class for XML validators."
cdef _ErrorLog _error_log
def __init__(self):
+ "__init__(self)"
self._error_log = _ErrorLog()
def validate(self, etree):
- """Validate the document using this schema.
+ """validate(self, etree)
+
+ Validate the document using this schema.
- Returns true if document is valid, false if not."""
+ Returns true if document is valid, false if not.
+ """
return self(etree)
def assertValid(self, etree):
- "Raises DocumentInvalid if the document does not comply with the schema."
+ """assertValid(self, etree)
+
+ Raises `DocumentInvalid` if the document does not comply with the schema.
+ """
if not self(etree):
raise DocumentInvalid(self._error_log._buildExceptionMessage(
"Document does not comply with schema"),
self._error_log)
def assert_(self, etree):
- "Raises AssertionError if the document does not comply with the schema."
+ """assert_(self, etree)
+
+ Raises `AssertionError` if the document does not comply with the schema.
+ """
if not self(etree):
raise AssertionError(self._error_log._buildExceptionMessage(
"Document does not comply with schema"))
property error_log:
+ "The log of validation errors and warnings."
def __get__(self):
return self._error_log.copy()
Modified: lxml/trunk/src/lxml/lxml.objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.objectify.pyx (original)
+++ lxml/trunk/src/lxml/lxml.objectify.pyx Wed Feb 13 21:48:39 2008
@@ -153,7 +153,9 @@
return _countSiblings(self._c_node)
def countchildren(self):
- """Return the number of children of this element, regardless of their
+ """countchildren(self)
+
+ Return the number of children of this element, regardless of their
name.
"""
# copied from etree
@@ -168,7 +170,9 @@
return c
def getchildren(self):
- """Returns a sequence of all direct children. The elements are
+ """getchildren(self)
+
+ Returns a sequence of all direct children. The elements are
returned in document order.
"""
cdef tree.xmlNode* c_node
@@ -217,7 +221,9 @@
self.remove(child)
def addattr(self, tag, value):
- """Add a child value to the element.
+ """addattr(self, tag, value)
+
+ Add a child value to the element.
As opposed to append(), it sets a data value, not an element.
"""
@@ -331,18 +337,21 @@
parent.remove(sibling)
def iterfind(self, path):
+ "iterfind(self, path)"
# Reimplementation of Element.iterfind() to make it work without child
# iteration.
xpath = etree.ETXPath(path)
return iter(xpath(self))
def findall(self, path):
+ "findall(self, path)"
# Reimplementation of Element.findall() to make it work without child
# iteration.
xpath = etree.ETXPath(path)
return xpath(self)
def find(self, path):
+ "find(self, path)"
# Reimplementation of Element.find() to make it work without child
# iteration.
result = self.findall(path)
@@ -354,6 +363,7 @@
return None
def findtext(self, path, default=None):
+ "findtext(self, path, default=None)"
# Reimplementation of Element.findtext() to make it work without child
# iteration.
result = self.find(path)
@@ -363,7 +373,9 @@
return default
def descendantpaths(self, prefix=None):
- """Returns a list of object path expressions for all descendants.
+ """descendantpaths(self, prefix=None)
+
+ Returns a list of object path expressions for all descendants.
"""
if prefix is not None and not python._isString(prefix):
prefix = '.'.join(prefix)
@@ -853,7 +865,8 @@
# Python type registry
cdef class PyType:
- """User defined type.
+ """PyType(self, name, type_check, type_class, stringify=None)
+ User defined type.
Named type that contains a type check function and a type class that
inherits from ObjectifiedDataElement. The type check must take a string
@@ -862,6 +875,7 @@
guessing.
Example::
+
PyType('int', int, MyIntClass).register()
Note that the order in which types are registered matters. The first
@@ -894,7 +908,9 @@
return "PyType(%s, %s)" % (self.name, self._type.__name__)
def register(self, before=None, after=None):
- """Register the type.
+ """register(self, before=None, after=None)
+
+ Register the type.
The additional keyword arguments 'before' and 'after' accept a
sequence of type names that must appear before/after the new type in
@@ -933,6 +949,7 @@
_SCHEMA_TYPE_DICT[xs_type] = self
def unregister(self):
+ "unregister(self)"
if _PYTYPE_DICT.get(self.name) is self:
del _PYTYPE_DICT[self.name]
for xs_type, pytype in _SCHEMA_TYPE_DICT.items():
@@ -989,7 +1006,9 @@
return _typename(obj)
def pytypename(obj):
- """Find the name of the corresponding PyType for a Python object.
+ """pytypename(obj)
+
+ Find the name of the corresponding PyType for a Python object.
"""
return _pytypename(obj)
@@ -1035,7 +1054,9 @@
_registerPyTypes()
def getRegisteredTypes():
- """Returns a list of the currently registered PyType objects.
+ """getRegisteredTypes()
+
+ Returns a list of the currently registered PyType objects.
To add a new type, retrieve this list and call unregister() for all
entries. Then add the new type at a suitable position (possibly replacing
@@ -1099,6 +1120,8 @@
cdef _ObjectifyElementMakerCaller NEW_ELEMENT_MAKER "PY_NEW" (object t)
cdef class ElementMaker:
+ """ElementMaker(self, namespace=None, nsmap=None, annotate=True, makeelement=None)
+ """
cdef object _makeelement
cdef object _namespace
cdef object _nsmap
@@ -1137,6 +1160,7 @@
cdef bint _annotate
def __call__(self, *children, **attrib):
+ "__call__(self, *children, **attrib)"
cdef _ObjectifyElementMakerCaller elementMaker
cdef python.PyObject* pytype
cdef _Element element
@@ -1214,14 +1238,18 @@
__RECURSIVE_STR = 0 # default: off
def enableRecursiveStr(on=True):
- """Enable a recursively generated tree representation for str(element),
+ """enableRecursiveStr(on=True)
+
+ Enable a recursively generated tree representation for str(element),
based on objectify.dump(element).
"""
global __RECURSIVE_STR
__RECURSIVE_STR = on
def dump(_Element element not None):
- """Return a recursively generated string representation of an element.
+ """dump(_Element element not None)
+
+ Return a recursively generated string representation of an element.
"""
return _dump(element, 0)
@@ -1268,6 +1296,7 @@
copy_reg.pickle(ObjectifiedElement, reduceFunction, fromstring)
def pickleReduce(obj):
+ "pickleReduce(obj)"
return (fromstring, (etree.tostring(obj),))
_setupPickle(pickleReduce)
@@ -1277,7 +1306,8 @@
# Element class lookup
cdef class ObjectifyElementClassLookup(ElementClassLookup):
- """Element class lookup method that uses the objectify classes.
+ """ObjectifyElementClassLookup(self, tree_class=None, empty_data_class=None)
+ Element class lookup method that uses the objectify classes.
"""
cdef object empty_data_class
cdef object tree_class
@@ -1363,7 +1393,9 @@
def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False,
empty_pytype=None):
- """Recursively annotates the elements of an XML tree with 'pytype'
+ """pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, empty_pytype=None)
+
+ Recursively annotates the elements of an XML tree with 'pytype'
attributes.
If the 'ignore_old' keyword argument is True (the default), current 'pytype'
@@ -1384,7 +1416,9 @@
def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False,
empty_type=None):
- """Recursively annotates the elements of an XML tree with 'xsi:type'
+ """xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None)
+
+ Recursively annotates the elements of an XML tree with 'xsi:type'
attributes.
If the 'ignore_old' keyword argument is True (the default), current
@@ -1411,7 +1445,9 @@
def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False,
empty_pytype=None, empty_type=None, annotate_xsi=0,
annotate_pytype=1):
- """Recursively annotates the elements of an XML tree with 'xsi:type'
+ """annotate(element_or_tree, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1)
+
+ Recursively annotates the elements of an XML tree with 'xsi:type'
and/or 'py:pytype' attributes.
If the 'ignore_old' keyword argument is True (the default), current
@@ -1597,7 +1633,9 @@
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
def deannotate(element_or_tree, *, pytype=True, xsi=True):
- """Recursively de-annotate the elements of an XML tree by removing 'pytype'
+ """deannotate(element_or_tree, pytype=True, xsi=True)
+
+ Recursively de-annotate the elements of an XML tree by removing 'pytype'
and/or 'type' attributes.
If the 'pytype' keyword argument is True (the default), 'pytype' attributes
@@ -1642,11 +1680,13 @@
objectify_parser = __DEFAULT_PARSER
def setDefaultParser(new_parser = None):
- "This function is deprecated, use ``set_default_parser()`` instead."
+ ":deprecated: use ``set_default_parser()`` instead."
set_default_parser(new_parser)
def set_default_parser(new_parser = None):
- """Replace the default parser used by objectify's Element() and
+ """set_default_parser(new_parser = None)
+
+ Replace the default parser used by objectify's Element() and
fromstring() functions.
The new parser must be an etree.XMLParser.
@@ -1662,7 +1702,9 @@
raise TypeError("parser must inherit from lxml.etree.XMLParser")
def makeparser(**kw):
- """Create a new XML parser for objectify trees.
+ """makeparser(remove_blank_text=True, **kw)
+
+ Create a new XML parser for objectify trees.
You can pass all keyword arguments that are supported by
``etree.XMLParser()``. Note that this parser defaults to removing
@@ -1685,7 +1727,9 @@
_fromstring = etree.fromstring
def fromstring(xml, parser=None):
- """Objectify specific version of the lxml.etree fromstring() function
+ """fromstring(xml, parser=None)
+
+ Objectify specific version of the lxml.etree fromstring() function
that uses the objectify parser.
You can pass a different parser as second argument.
@@ -1700,7 +1744,9 @@
_parse = etree.parse
def parse(f, parser=None):
- """Parse a file or file-like object with the objectify parser.
+ """parse(f, parser=None)
+
+ Parse a file or file-like object with the objectify parser.
You can pass a different parser as second argument.
"""
@@ -1716,7 +1762,9 @@
E = ElementMaker()
def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes):
- """Objectify specific version of the lxml.etree Element() factory that
+ """Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes)
+
+ Objectify specific version of the lxml.etree Element() factory that
always creates a structural (tree) element.
NOTE: requires parser based element class lookup activated in lxml.etree!
@@ -1734,7 +1782,9 @@
def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None,
**_attributes):
- """Create a new element from a Python value and XML attributes taken from
+ """DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, **_attributes)
+
+ Create a new element from a Python value and XML attributes taken from
keyword arguments or a dictionary passed as second argument.
Automatically adds a 'pytype' attribute for the Python type of the value,
Modified: lxml/trunk/src/lxml/nsclasses.pxi
==============================================================================
--- lxml/trunk/src/lxml/nsclasses.pxi (original)
+++ lxml/trunk/src/lxml/nsclasses.pxi Wed Feb 13 21:48:39 2008
@@ -28,10 +28,14 @@
self._entries = {}
def update(self, class_dict_iterable):
- """Forgivingly update the registry. If registered values do not match
- the required type for this registry, or if their name starts with '_',
- they will be silently discarded. This allows registrations at the
- module or class level using vars(), globals() etc."""
+ """update(self, class_dict_iterable)
+
+ Forgivingly update the registry.
+
+ If registered values do not match the required type for this
+ registry, or if their name starts with '_', they will be
+ silently discarded. This allows registrations at the module or
+ class level using vars(), globals() etc."""
if hasattr(class_dict_iterable, 'items'):
class_dict_iterable = class_dict_iterable.items()
for name, item in class_dict_iterable:
@@ -89,7 +93,9 @@
cdef class ElementNamespaceClassLookup(FallbackElementClassLookup):
- """Element class lookup scheme that searches the Element class in the
+ """ElementNamespaceClassLookup(self, fallback=None)
+
+ Element class lookup scheme that searches the Element class in the
Namespace registry.
"""
cdef object _namespace_registries
@@ -99,8 +105,11 @@
self._lookup_function = _find_nselement_class
def get_namespace(self, ns_uri):
- """Retrieve the namespace object associated with the given URI. Creates a
- new one if it does not yet exist."""
+ """get_namespace(self, ns_uri)
+
+ Retrieve the namespace object associated with the given URI.
+
+ Creates a new one if it does not yet exist."""
if ns_uri:
ns_utf = _utf8(ns_uri)
else:
@@ -156,9 +165,13 @@
__FUNCTION_NAMESPACE_REGISTRIES = {}
def FunctionNamespace(ns_uri):
- """Retrieve the function namespace object associated with the given
- URI. Creates a new one if it does not yet exist. A function namespace can
- only be used to register extension functions."""
+ """FunctionNamespace(ns_uri)
+
+ Retrieve the function namespace object associated with the given
+ URI.
+
+ Creates a new one if it does not yet exist. A function namespace
+ can only be used to register extension functions."""
if ns_uri:
ns_utf = _utf8(ns_uri)
else:
Modified: lxml/trunk/src/lxml/objectpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/objectpath.pxi (original)
+++ lxml/trunk/src/lxml/objectpath.pxi Wed Feb 13 21:48:39 2008
@@ -8,7 +8,8 @@
cdef class ObjectPath:
- """Immutable object that represents a compiled object path.
+ """ObjectPath(path)
+ Immutable object that represents a compiled object path.
Example for a path: 'root.child[1].{other}child[25]'
"""
@@ -54,6 +55,7 @@
default, use_default)
def hasattr(self, _Element root not None):
+ "hasattr(self, root)"
try:
_findObjectPath(root, self._c_path, self._path_len, None, 0)
except AttributeError:
@@ -61,14 +63,18 @@
return True
def setattr(self, _Element root not None, value):
- """Set the value of the target element in a subtree.
+ """setattr(self, root, value)
+
+ Set the value of the target element in a subtree.
If any of the children on the path does not exist, it is created.
"""
_createObjectPath(root, self._c_path, self._path_len, 1, value)
def addattr(self, _Element root not None, value):
- """Append a value to the target element in a subtree.
+ """addattr(self, root, value)
+
+ Append a value to the target element in a subtree.
If any of the children on the path does not exist, it is created.
"""
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Wed Feb 13 21:48:39 2008
@@ -668,6 +668,7 @@
return context._error_log.copy()
property resolvers:
+ "The custom resolver registry of this parser."
def __get__(self):
return self._resolvers
@@ -681,7 +682,9 @@
self.set_element_class_lookup(lookup)
def set_element_class_lookup(self, ElementClassLookup lookup = None):
- """Set a lookup scheme for element classes generated from this parser.
+ """set_element_class_lookup(self, lookup = None)
+
+ Set a lookup scheme for element classes generated from this parser.
Reset it by passing None or nothing.
"""
@@ -702,11 +705,16 @@
return parser
def copy(self):
- "Create a new parser with the same configuration."
+ """copy(self)
+
+ Create a new parser with the same configuration.
+ """
return self._copy()
def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
- """Creates a new element associated with this parser.
+ """makeelement(self, _tag, attrib=None, nsmap=None, **_extra)
+
+ Creates a new element associated with this parser.
"""
return _makeElement(_tag, NULL, None, self, None, None,
attrib, nsmap, _extra)
@@ -861,7 +869,9 @@
return context._error_log.copy()
def feed(self, data):
- """Feeds data to the parser. The argument should be an 8-bit string
+ """feed(self, data)
+
+ Feeds data to the parser. The argument should be an 8-bit string
buffer containing encoded data, although Unicode is supported as long
as both string types are not mixed.
@@ -942,7 +952,9 @@
context.cleanup()
def close(self):
- """Terminates feeding data to this parser. This tells the parser to
+ """close(self)
+
+ Terminates feeding data to this parser. This tells the parser to
process any remaining data in the feed buffer, and then returns the
root Element of the tree that was parsed.
@@ -1303,7 +1315,8 @@
############################################################
cdef class TreeBuilder(_SaxParserTarget):
- """Parser target that builds a tree.
+ """TreeBuilder(self, element_factory=None, parser=None)
+ Parser target that builds a tree.
The final tree is returned by the ``close()`` method.
"""
@@ -1343,7 +1356,9 @@
# Python level event handlers
def close(self):
- """Flushes the builder buffers, and returns the toplevel document
+ """close(self)
+
+ Flushes the builder buffers, and returns the toplevel document
element.
"""
assert python.PyList_GET_SIZE(self._element_stack) == 0, "missing end tags"
@@ -1351,19 +1366,27 @@
return self._last
def data(self, data):
- """Adds text to the current element. The value should be either an
+ """data(self, data)
+
+ Adds text to the current element. The value should be either an
8-bit string containing ASCII text, or a Unicode string.
"""
self._handleSaxData(data)
def start(self, tag, attrs, nsmap=None):
- "Opens a new element."
+ """start(self, tag, attrs, nsmap=None)
+
+ Opens a new element.
+ """
if nsmap is None:
nsmap = EMPTY_READ_ONLY_DICT
return self._handleSaxStart(tag, attrs, nsmap)
def end(self, tag):
- "Closes the current element."
+ """end(self, tag)
+
+ Closes the current element.
+ """
element = self._handleSaxEnd(tag)
assert self._last.tag == tag,\
"end tag mismatch (expected %s, got %s)" % (
@@ -1371,9 +1394,13 @@
return element
def pi(self, target, data):
+ """pi(self, target, data)
+ """
return self._handleSaxPi(target, data)
def comment(self, comment):
+ """comment(self, comment)
+ """
return self._handleSaxComment(comment)
# internal SAX event handlers
@@ -1432,33 +1459,36 @@
)
cdef class XMLParser(_FeedParser):
- """The XML parser. Parsers can be supplied as additional argument to
- various parse functions of the lxml API. A default parser is always
- available and can be replaced by a call to the global function
- 'set_default_parser'. New parsers can be created at any time without a
- major run-time overhead.
+ """XMLParser(self, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, remove_blank_text=False, compact=True, resolve_entities=True, remove_comments=False, remove_pis=False, target=None, encoding=None, schema=None)
+ The XML parser.
+
+ Parsers can be supplied as additional argument to various parse
+ functions of the lxml API. A default parser is always available
+ and can be replaced by a call to the global function
+ 'set_default_parser'. New parsers can be created at any time
+ without a major run-time overhead.
The keyword arguments in the constructor are mainly based on the libxml2
parser configuration. A DTD will also be loaded if validation or
attribute default values are requested.
Available boolean keyword arguments:
- * attribute_defaults - read default attributes from DTD
- * dtd_validation - validate (if DTD is available)
- * load_dtd - use DTD for parsing
- * no_network - prevent network access for related files (default: True)
- * ns_clean - clean up redundant namespace declarations
- * recover - try hard to parse through broken XML
- * remove_blank_text - discard blank text nodes
- * remove_comments - discard comments
- * remove_pis - discard processing instructions
- * compact - safe memory for short text content (default: True)
- * resolve_entities - replace entities by their text value (default: True)
+ - attribute_defaults - read default attributes from DTD
+ - dtd_validation - validate (if DTD is available)
+ - load_dtd - use DTD for parsing
+ - no_network - prevent network access for related files (default: True)
+ - ns_clean - clean up redundant namespace declarations
+ - recover - try hard to parse through broken XML
+ - remove_blank_text - discard blank text nodes
+ - remove_comments - discard comments
+ - remove_pis - discard processing instructions
+ - compact - safe memory for short text content (default: True)
+ - resolve_entities - replace entities by their text value (default: True)
Other keyword arguments:
- * encoding - override the document encoding
- * target - a parser target object that will receive the parse events
- * schema - an XMLSchema to validate against
+ - encoding - override the document encoding
+ - target - a parser target object that will receive the parse events
+ - schema - an XMLSchema to validate against
Note that you should avoid sharing parsers between threads. While this is
not harmful, it is more efficient to use separate parsers. This does not
@@ -1498,8 +1528,10 @@
target, None, encoding)
cdef class ETCompatXMLParser(XMLParser):
- """An XML parser with an ElementTree compatible default setup. See the
- XMLParser class for details.
+ """ETCompatXMLParser(self, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, remove_blank_text=False, compact=True, resolve_entities=True, remove_comments=True, remove_pis=True, target=None, encoding=None, schema=None)
+ An XML parser with an ElementTree compatible default setup.
+
+ See the XMLParser class for details.
This parser has ``remove_comments`` and ``remove_pis`` enabled by default
and thus ignores comments and processing instructions.
@@ -1532,15 +1564,17 @@
__GLOBAL_PARSER_CONTEXT.setDefaultParser(__DEFAULT_XML_PARSER)
def setDefaultParser(parser=None):
- "@deprecated: please use set_default_parser instead."
+ ":deprecated: please use set_default_parser instead."
set_default_parser(parser)
def getDefaultParser():
- "@deprecated: please use get_default_parser instead."
+ ":deprecated: please use get_default_parser instead."
return get_default_parser()
def set_default_parser(_BaseParser parser=None):
- """Set a default parser for the current thread. This parser is used
+ """set_default_parser(parser=None)
+
+ Set a default parser for the current thread. This parser is used
globally whenever no parser is supplied to the various parse functions of
the lxml API. If this function is called without a parser (or if it is
None), the default parser is reset to the original configuration.
@@ -1554,6 +1588,7 @@
__GLOBAL_PARSER_CONTEXT.setDefaultParser(parser)
def get_default_parser():
+ "get_default_parser()"
return __GLOBAL_PARSER_CONTEXT.getDefaultParser()
############################################################
@@ -1568,22 +1603,26 @@
)
cdef class HTMLParser(_FeedParser):
- """The HTML parser. This parser allows reading HTML into a normal XML
- tree. By default, it can read broken (non well-formed) HTML, depending on
- the capabilities of libxml2. Use the 'recover' option to switch this off.
+ """HTMLParser(self, recover=True, no_network=True, remove_blank_text=False, compact=True, remove_comments=False, remove_pis=False, target=None, encoding=None, schema=None)
+ The HTML parser.
+
+ This parser allows reading HTML into a normal XML tree. By
+ default, it can read broken (non well-formed) HTML, depending on
+ the capabilities of libxml2. Use the 'recover' option to switch
+ this off.
Available boolean keyword arguments:
- * recover - try hard to parse through broken HTML (default: True)
- * no_network - prevent network access for related files (default: True)
- * remove_blank_text - discard empty text nodes
- * remove_comments - discard comments
- * remove_pis - discard processing instructions
- * compact - safe memory for short text content (default: True)
+ - recover - try hard to parse through broken HTML (default: True)
+ - no_network - prevent network access for related files (default: True)
+ - remove_blank_text - discard empty text nodes
+ - remove_comments - discard comments
+ - remove_pis - discard processing instructions
+ - compact - safe memory for short text content (default: True)
Other keyword arguments:
- * encoding - override the document encoding
- * target - a parser target object that will receive the parse events
- * schema - an XMLSchema to validate against
+ - encoding - override the document encoding
+ - target - a parser target object that will receive the parse events
+ - schema - an XMLSchema to validate against
Note that you should avoid sharing parsers between threads for performance
reasons.
Modified: lxml/trunk/src/lxml/relaxng.pxi
==============================================================================
--- lxml/trunk/src/lxml/relaxng.pxi (original)
+++ lxml/trunk/src/lxml/relaxng.pxi Wed Feb 13 21:48:39 2008
@@ -20,7 +20,8 @@
# RelaxNG
cdef class RelaxNG(_Validator):
- """Turn a document into a Relax NG validator.
+ """RelaxNG(self, etree=None, file=None)
+ Turn a document into a Relax NG validator.
Either pass a schema as Element or ElementTree, or pass a file or
filename through the ``file`` keyword argument.
@@ -91,7 +92,9 @@
relaxng.xmlRelaxNGFree(self._c_schema)
def __call__(self, etree):
- """Validate doc using Relax NG.
+ """__call__(self, etree)
+
+ Validate doc using Relax NG.
Returns true if document is valid, false if not."""
cdef _Document doc
Modified: lxml/trunk/src/lxml/schematron.pxi
==============================================================================
--- lxml/trunk/src/lxml/schematron.pxi (original)
+++ lxml/trunk/src/lxml/schematron.pxi Wed Feb 13 21:48:39 2008
@@ -66,14 +66,15 @@
# Schematron
cdef class Schematron(_Validator):
- """A Schematron validator.
+ """Schematron(self, etree=None, file=None)
+ A Schematron validator.
Pass a root Element or an ElementTree to turn it into a validator.
Alternatively, pass a filename as keyword argument 'file' to parse from
the file system.
"""
cdef schematron.xmlSchematron* _c_schema
- def __init__(self, etree=None, file=None):
+ def __init__(self, etree=None, *, file=None):
cdef _Document doc
cdef _Element root_node
cdef xmlNode* c_node
@@ -120,7 +121,9 @@
schematron.xmlSchematronFree(self._c_schema)
def __call__(self, etree):
- """Validate doc using Schematron.
+ """__call__(self, etree)
+
+ Validate doc using Schematron.
Returns true if document is valid, false if not."""
cdef _Document doc
Modified: lxml/trunk/src/lxml/usedoctest.py
==============================================================================
--- lxml/trunk/src/lxml/usedoctest.py (original)
+++ lxml/trunk/src/lxml/usedoctest.py Wed Feb 13 21:48:39 2008
@@ -1,3 +1,13 @@
+"""Doctest module for XML comparison.
+
+Usage::
+
+ >>> import lxml.usedoctest
+ >>> # now do your XML doctests ...
+
+See `lxml.doctestcompare`
+"""
+
from lxml import doctestcompare
doctestcompare.temp_install(del_module=__name__)
Modified: lxml/trunk/src/lxml/xinclude.pxi
==============================================================================
--- lxml/trunk/src/lxml/xinclude.pxi (original)
+++ lxml/trunk/src/lxml/xinclude.pxi Wed Feb 13 21:48:39 2008
@@ -8,7 +8,8 @@
pass
cdef class XInclude:
- """XInclude processor.
+ """XInclude(self)
+ XInclude processor.
Create an instance and call it on an Element to run XInclude
processing.
@@ -22,6 +23,7 @@
return self._error_log.copy()
def __call__(self, _Element node not None):
+ "__call__(self, node)"
# We cannot pass the XML_PARSE_NOXINCNODE option as this would free
# the XInclude nodes - there may still be Python references to them!
# Therefore, we allow XInclude nodes to be converted to
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Wed Feb 13 21:48:39 2008
@@ -5,7 +5,9 @@
# module level API functions
def clear_error_log():
- """Clear the global error log. Note that this log is already bound to a
+ """clear_error_log()
+
+ Clear the global error log. Note that this log is already bound to a
fixed size.
"""
__GLOBAL_ERROR_LOG.clear()
@@ -14,7 +16,7 @@
"""Clear the global error log. Note that this log is already bound to a
fixed size.
- @deprecated: use ``clear_error_log()`` instead.
+ :deprecated: use ``clear_error_log()`` instead.
"""
__GLOBAL_ERROR_LOG.clear()
@@ -233,8 +235,10 @@
return _ListErrorLog(filtered, None, None)
def filter_types(self, types):
- """Filter the errors by the given types and return a new error log
- containing the matches.
+ """filter_types(self, types)
+
+ Filter the errors by the given types and return a new error
+ log containing the matches.
"""
cdef _LogEntry entry
if not python.PySequence_Check(types):
@@ -246,8 +250,10 @@
return _ListErrorLog(filtered, None, None)
def filter_levels(self, levels):
- """Filter the errors by the given error levels and return a new error
- log containing the matches.
+ """filter_levels(self, levels)
+
+ Filter the errors by the given error levels and return a new
+ error log containing the matches.
"""
cdef _LogEntry entry
if not python.PySequence_Check(levels):
@@ -259,7 +265,10 @@
return _ListErrorLog(filtered, None, None)
def filter_from_level(self, level):
- "Return a log with all messages of the requested level of worse."
+ """filter_from_level(self, level)
+
+ Return a log with all messages of the requested level of worse.
+ """
cdef _LogEntry entry
filtered = []
for entry in self._entries:
@@ -268,15 +277,24 @@
return _ListErrorLog(filtered, None, None)
def filter_from_fatals(self):
- "Convenience method to get all fatal error messages."
+ """filter_from_fatals(self)
+
+ Convenience method to get all fatal error messages.
+ """
return self.filter_from_level(ErrorLevels.FATAL)
def filter_from_errors(self):
- "Convenience method to get all error messages or worse."
+ """filter_from_errors(self)
+
+ Convenience method to get all error messages or worse.
+ """
return self.filter_from_level(ErrorLevels.ERROR)
def filter_from_warnings(self):
- "Convenience method to get all warnings or worse."
+ """filter_from_warnings(self)
+
+ Convenience method to get all warnings or worse.
+ """
return self.filter_from_level(ErrorLevels.WARNING)
cdef class _ErrorLog(_ListErrorLog):
@@ -331,7 +349,8 @@
python.PyList_Append(entries, entry)
cdef class PyErrorLog(_BaseErrorLog):
- """A global error log that connects to the Python stdlib logging package.
+ """PyErrorLog(self, logger_name=None)
+ A global error log that connects to the Python stdlib logging package.
The constructor accepts an optional logger name.
@@ -395,12 +414,14 @@
Note that this disables access to the global error log from exceptions.
Parsers, XSLT etc. will continue to provide their normal local error log.
- @deprecated: use ``use_global_python_log()`` instead.
+ :deprecated: use ``use_global_python_log()`` instead.
"""
use_global_python_log(log)
def use_global_python_log(PyErrorLog log not None):
- """Replace the global error log by an etree.PyErrorLog that uses the
+ """use_global_python_log(log)
+
+ Replace the global error log by an etree.PyErrorLog that uses the
standard Python logging package.
Note that this disables access to the global error log from exceptions.
Modified: lxml/trunk/src/lxml/xmlid.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlid.pxi (original)
+++ lxml/trunk/src/lxml/xmlid.pxi Wed Feb 13 21:48:39 2008
@@ -1,7 +1,9 @@
cdef object _find_id_attributes
def XMLID(text):
- """Parse the text and return a tuple (root node, ID dictionary). The root
+ """XMLID(text)
+
+ Parse the text and return a tuple (root node, ID dictionary). The root
node is the same as returned by the XML() function. The dictionary
contains string-element pairs. The dictionary keys are the values of 'id'
attributes. The elements referenced by the ID are stored as dictionary
@@ -19,7 +21,9 @@
return (root, dic)
def XMLDTDID(text):
- """Parse the text and return a tuple (root node, ID dictionary). The root
+ """XMLDTDID(text)
+
+ Parse the text and return a tuple (root node, ID dictionary). The root
node is the same as returned by the XML() function. The dictionary
contains string-element pairs. The dictionary keys are the values of ID
attributes as defined by the DTD. The elements referenced by the ID are
@@ -37,7 +41,9 @@
return (root, _IDDict(root))
def parseid(source, parser=None):
- """Parses the source into a tuple containing an ElementTree object and an
+ """parseid(source, parser=None)
+
+ Parses the source into a tuple containing an ElementTree object and an
ID dictionary. If no parser is provided as second argument, the default
parser is used.
@@ -49,7 +55,8 @@
return (_elementTreeFactory(doc, None), _IDDict(doc))
cdef class _IDDict:
- """A dictionary-like proxy class that mapps ID attributes to elements.
+ """IDDict(self, etree)
+ A dictionary-like proxy class that mapps ID attributes to elements.
The dictionary must be instantiated with the root element of a parsed XML
document, otherwise the behaviour is undefined. Elements and XML trees
Modified: lxml/trunk/src/lxml/xmlschema.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlschema.pxi (original)
+++ lxml/trunk/src/lxml/xmlschema.pxi Wed Feb 13 21:48:39 2008
@@ -20,7 +20,8 @@
# XMLSchema
cdef class XMLSchema(_Validator):
- """Turn a document into an XML Schema validator.
+ """XMLSchema(self, etree=None, file=None)
+ Turn a document into an XML Schema validator.
Either pass a schema as Element or ElementTree, or pass a file or
filename through the ``file`` keyword argument.
@@ -83,7 +84,9 @@
xmlschema.xmlSchemaFree(self._c_schema)
def __call__(self, etree):
- """Validate doc using XML Schema.
+ """__call__(self, etree)
+
+ Validate doc using XML Schema.
Returns true if document is valid, false if not.
"""
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Wed Feb 13 21:48:39 2008
@@ -127,13 +127,17 @@
self._context.set_context(xpathCtxt)
def evaluate(self, _eval_arg, **_variables):
- """Evaluate an XPath expression.
+ """evaluate(self, _eval_arg, **_variables)
+
+ Evaluate an XPath expression.
Instead of calling this method, you can also call the evaluator object
itself.
Variables may be provided as keyword arguments. Note that namespaces
are currently not supported for variables.
+
+ :deprecated: call the object, not its method.
"""
return self(_eval_arg, **_variables)
@@ -207,7 +211,8 @@
cdef class XPathElementEvaluator(_XPathEvaluatorBase):
- """Create an XPath evaluator for an element.
+ """XPathElementEvaluator(self, element, namespaces=None, extensions=None, regexp=True)
+ Create an XPath evaluator for an element.
Absolute XPath expressions (starting with '/') will be evaluated against
the ElementTree as returned by getroottree().
@@ -232,17 +237,34 @@
def registerNamespace(self, prefix, uri):
"""Register a namespace with the XPath context.
+
+ :deprecated: use ``register_namespace()`` instead
+ """
+ self._context.addNamespace(prefix, uri)
+
+ def register_namespace(self, prefix, uri):
+ """Register a namespace with the XPath context.
"""
self._context.addNamespace(prefix, uri)
def registerNamespaces(self, namespaces):
"""Register a prefix -> uri dict.
+
+ :deprecated: use ``register_namespaces()`` instead
+ """
+ for prefix, uri in namespaces.items():
+ self._context.addNamespace(prefix, uri)
+
+ def register_namespaces(self, namespaces):
+ """Register a prefix -> uri dict.
"""
for prefix, uri in namespaces.items():
self._context.addNamespace(prefix, uri)
def __call__(self, _path, **_variables):
- """Evaluate an XPath expression on the document.
+ """__call__(self, _path, **_variables)
+
+ Evaluate an XPath expression on the document.
Variables may be provided as keyword arguments. Note that namespaces
are currently not supported for variables.
@@ -276,7 +298,8 @@
cdef class XPathDocumentEvaluator(XPathElementEvaluator):
- """Create an XPath evaluator for an ElementTree.
+ """XPathDocumentEvaluator(self, etree, namespaces=None, extensions=None, regexp=True)
+ Create an XPath evaluator for an ElementTree.
Additional namespace declarations can be passed with the 'namespace'
keyword argument. EXSLT regular expression support can be disabled with
@@ -289,7 +312,9 @@
extensions=extensions, regexp=regexp)
def __call__(self, _path, **_variables):
- """Evaluate an XPath expression on the document.
+ """__call__(self, _path, **_variables)
+
+ Evaluate an XPath expression on the document.
Variables may be provided as keyword arguments. Note that namespaces
are currently not supported for variables.
@@ -327,7 +352,9 @@
def XPathEvaluator(etree_or_element, *, namespaces=None, extensions=None,
regexp=True):
- """Creates an XPath evaluator for an ElementTree or an Element.
+ """XPathEvaluator(etree_or_element, namespaces=None, extensions=None, regexp=True)
+
+ Creates an XPath evaluator for an ElementTree or an Element.
The resulting object can be called with an XPath expression as argument
and XPath variables provided as keyword arguments.
@@ -347,8 +374,8 @@
cdef class XPath(_XPathEvaluatorBase):
- """A compiled XPath expression that can be called on Elements and
- ElementTrees.
+ """XPath(self, path, namespaces=None, extensions=None, regexp=True)
+ A compiled XPath expression that can be called on Elements and ElementTrees.
Besides the XPath expression, you can pass prefix-namespace mappings and
extension functions to the constructor through the keyword arguments
@@ -374,6 +401,7 @@
self._raise_parse_error()
def __call__(self, _etree_or_element, **_variables):
+ "__call__(self, _etree_or_element, **_variables)"
cdef xpath.xmlXPathObject* xpathObj
cdef _Document document
cdef _Element element
@@ -414,8 +442,8 @@
_find_namespaces = re.compile('({[^}]+})').findall
cdef class ETXPath(XPath):
- """Special XPath class that supports the ElementTree {uri} notation for
- namespaces.
+ """ETXPath(self, path, extensions=None, regexp=True)
+ Special XPath class that supports the ElementTree {uri} notation for namespaces.
Note that this class does not accept the ``namespace`` keyword
argument. All namespaces must be passed as part of the path string.
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Wed Feb 13 21:48:39 2008
@@ -167,16 +167,18 @@
# XSLT file/network access control
cdef class XSLTAccessControl:
- """Access control for XSLT: reading/writing files, directories and network
- I/O. Access to a type of resource is granted or denied by passing any of
- the following keyword arguments. All of them default to True to allow
- access.
-
- * read_file
- * write_file
- * create_dir
- * read_network
- * write_network
+ """XSLTAccessControl(self, read_file=True, write_file=True, create_dir=True, read_network=True, write_network=True)
+
+ Access control for XSLT: reading/writing files, directories and
+ network I/O. Access to a type of resource is granted or denied by
+ passing any of the following boolean keyword arguments. All of
+ them default to True to allow access.
+
+ - read_file
+ - write_file
+ - create_dir
+ - read_network
+ - write_network
"""
cdef xslt.xsltSecurityPrefs* _prefs
def __init__(self, *, read_file=True, write_file=True, create_dir=True,
@@ -252,16 +254,25 @@
cdef class XSLT:
- """Turn a document into an XSLT object.
+ """XSLT(self, xslt_input, extensions=None, regexp=True, access_control=None)
+
+ Turn an XSL document into an XSLT object.
+
+ Calling this object on a tree or Element will execute the XSLT::
+
+ >>> transform = etree.XSLT(xsl_tree)
+ >>> result = transform(xml_tree)
Keyword arguments of the constructor:
- * regexp - enable exslt regular expression support in XPath (default: True)
- * access_control - access restrictions for network or file system
+ - regexp: enable exslt regular expression support in XPath
+ (default: True)
+ - access_control: access restrictions for network or file
+ system (see `XSLTAccessControl`)
- Keyword arguments of the XSLT run:
- * profile_run - enable XSLT profiling
+ Keyword arguments of the XSLT call:
+ - profile_run: enable XSLT profiling (default: False)
- Other keyword arguments are passed to the stylesheet.
+ Other keyword arguments of the call are passed to the stylesheet.
"""
cdef _XSLTContext _context
cdef xslt.xsltStylesheet* _c_style
@@ -328,14 +339,22 @@
xslt.xsltFreeStylesheet(self._c_style)
property error_log:
+ "The log of errors and warnings of an XSLT execution."
def __get__(self):
return self._error_log.copy()
def apply(self, _input, *, profile_run=False, **_kw):
+ """apply(self, _input, profile_run=False, **_kw)
+
+ :deprecated: call the object, not this method."""
return self(_input, profile_run=profile_run, **_kw)
def tostring(self, _ElementTree result_tree):
- """Save result doc to string based on stylesheet output method.
+ """tostring(self, result_tree)
+
+ Save result doc to string based on stylesheet output method.
+
+ :deprecated: use str(result_tree) instead.
"""
return str(result_tree)
@@ -346,6 +365,14 @@
return _copyXSLT(self)
def __call__(self, _input, *, profile_run=False, **_kw):
+ """__call__(self, _input, profile_run=False, **_kw)
+
+ Execute the XSL transformation on a tree or Element.
+
+ Pass the ``profile_run`` option to get profile information
+ about the XSLT. The result of the XSLT will have a property
+ xslt_profile that holds an XML tree with profiling data.
+ """
cdef _XSLTContext context
cdef _XSLTResolverContext resolver_context
cdef _Document input_doc
From scoder at codespeak.net Wed Feb 13 21:48:44 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 13 Feb 2008 21:48:44 +0100 (CET)
Subject: [Lxml-checkins] r51453 - in lxml/trunk: . src/lxml
Message-ID: <20080213204844.7A2EB168406@codespeak.net>
Author: scoder
Date: Wed Feb 13 21:48:43 2008
New Revision: 51453
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.pyclasslookup.pyx
Log:
r3464 at delle: sbehnel | 2008-02-13 16:30:01 +0100
one more signature
Modified: lxml/trunk/src/lxml/lxml.pyclasslookup.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.pyclasslookup.pyx (original)
+++ lxml/trunk/src/lxml/lxml.pyclasslookup.pyx Wed Feb 13 21:48:43 2008
@@ -238,7 +238,8 @@
cdef class PythonElementClassLookup(FallbackElementClassLookup):
- """Element class lookup based on a subclass method.
+ """PythonElementClassLookup(self, fallback=None)
+ Element class lookup based on a subclass method.
To use it, inherit from this class and override the lookup method to
lookup the element class for a node::
From scoder at codespeak.net Wed Feb 13 21:48:48 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 13 Feb 2008 21:48:48 +0100 (CET)
Subject: [Lxml-checkins] r51454 - in lxml/trunk: . src/lxml src/lxml/html
Message-ID: <20080213204848.8D998168407@codespeak.net>
Author: scoder
Date: Wed Feb 13 21:48:48 2008
New Revision: 51454
Removed:
lxml/trunk/src/lxml/htmlbuilder.py
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/html/builder.py
Log:
r3465 at delle: sbehnel | 2008-02-13 17:21:12 +0100
removed redundant module lxml.htmlbuilder (duplicate of lxml.html.builder)
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed Feb 13 21:48:48 2008
@@ -14,6 +14,9 @@
Bugs fixed
----------
+* The module ``lxml.html.builder`` was duplicated as
+ ``lxml.htmlbuilder``
+
* Setting an element slice in objectify could insert slice-overlapping
elements at the wrong position.
Modified: lxml/trunk/src/lxml/html/builder.py
==============================================================================
--- lxml/trunk/src/lxml/html/builder.py (original)
+++ lxml/trunk/src/lxml/html/builder.py Wed Feb 13 21:48:48 2008
@@ -1,3 +1,31 @@
+# --------------------------------------------------------------------
+# The ElementTree toolkit is
+#
+# Copyright (c) 1999-2004 by Fredrik Lundh
+#
+# By obtaining, using, and/or copying this software and/or its
+# associated documentation, you agree that you have read, understood,
+# and will comply with the following terms and conditions:
+#
+# Permission to use, copy, modify, and distribute this software and
+# its associated documentation for any purpose and without fee is
+# hereby granted, provided that the above copyright notice appears in
+# all copies, and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# Secret Labs AB or the author not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission.
+#
+# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
+# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
+# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
+# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
+# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+# OF THIS SOFTWARE.
+# --------------------------------------------------------------------
+
"""
HTML specialisation of ``builder.py`` by Fredrik Lundh
Deleted: /lxml/trunk/src/lxml/htmlbuilder.py
==============================================================================
--- /lxml/trunk/src/lxml/htmlbuilder.py Wed Feb 13 21:48:48 2008
+++ (empty file)
@@ -1,154 +0,0 @@
-#
-# HTML specialisation of ``builder.py`` by Fredrik Lundh
-#
-# --------------------------------------------------------------------
-# The ElementTree toolkit is
-#
-# Copyright (c) 1999-2004 by Fredrik Lundh
-#
-# By obtaining, using, and/or copying this software and/or its
-# associated documentation, you agree that you have read, understood,
-# and will comply with the following terms and conditions:
-#
-# Permission to use, copy, modify, and distribute this software and
-# its associated documentation for any purpose and without fee is
-# hereby granted, provided that the above copyright notice appears in
-# all copies, and that both that copyright notice and this permission
-# notice appear in supporting documentation, and that the name of
-# Secret Labs AB or the author not be used in advertising or publicity
-# pertaining to distribution of the software without specific, written
-# prior permission.
-#
-# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
-# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
-# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
-# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
-# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
-# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
-# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
-# OF THIS SOFTWARE.
-# --------------------------------------------------------------------
-
-"""
-Usage::
-
- >>> from lxml.htmlbuilder import *
- >>> html = HTML(
- ... HEAD( TITLE("Hello World") ),
- ... BODY( CLASS("main"),
- ... H1("Hello World !")
- ... )
- ... )
-
- >>> import lxml.etree
- >>> print lxml.etree.tostring(html, pretty_print=True)
-
-
- Hello World
-
-
-
Hello World !
-
-
-
-"""
-
-from builder import E
-
-# elements
-A = E.a # anchor
-ABBR = E.abbr # abbreviated form (e.g., WWW, HTTP, etc.)
-ACRONYM = E.acronym #
-ADDRESS = E.address # information on author
-APPLET = E.applet # Java applet (DEPRECATED)
-AREA = E.area # client-side image map area
-B = E.b # bold text style
-BASE = E.base # document base URI
-BASEFONT = E.basefont # base font size (DEPRECATED)
-BDO = E.bdo # I18N BiDi over-ride
-BIG = E.big # large text style
-BLOCKQUOTE = E.blockquote # long quotation
-BODY = E.body # document body
-BR = E.br # forced line break
-BUTTON = E.button # push button
-CAPTION = E.caption # table caption
-CENTER = E.center # shorthand for DIV align=center (DEPRECATED)
-CITE = E.cite # citation
-CODE = E.code # computer code fragment
-COL = E.col # table column
-COLGROUP = E.colgroup # table column group
-DD = E.dd # definition description
-DEL = getattr(E, 'del') # deleted text
-DFN = E.dfn # instance definition
-DIR = E.dir # directory list (DEPRECATED)
-DIV = E.div # generic language/style container
-DL = E.dl # definition list
-DT = E.dt # definition term
-EM = E.em # emphasis
-FIELDSET = E.fieldset # form control group
-FONT = E.font # local change to font (DEPRECATED)
-FORM = E.form # interactive form
-FRAME = E.frame # subwindow
-FRAMESET = E.frameset # window subdivision
-H1 = E.h1 # heading
-H2 = E.h2 # heading
-H3 = E.h3 # heading
-H4 = E.h4 # heading
-H5 = E.h5 # heading
-H6 = E.h6 # heading
-HEAD = E.head # document head
-HR = E.hr # horizontal rule
-HTML = E.html # document root element
-I = E.i # italic text style
-IFRAME = E.iframe # inline subwindow
-IMG = E.img # Embedded image
-INPUT = E.input # form control
-INS = E.ins # inserted text
-ISINDEX = E.isindex # single line prompt (DEPRECATED)
-KBD = E.kbd # text to be entered by the user
-LABEL = E.label # form field label text
-LEGEND = E.legend # fieldset legend
-LI = E.li # list item
-LINK = E.link # a media-independent link
-MAP = E.map # client-side image map
-MENU = E.menu # menu list (DEPRECATED)
-META = E.meta # generic metainformation
-NOFRAMES = E.noframes # alternate content container for non frame-based rendering
-NOSCRIPT = E.noscript # alternate content container for non script-based rendering
-OBJECT = E.object # generic embedded object
-OL = E.ol # ordered list
-OPTGROUP = E.optgroup # option group
-OPTION = E.option # selectable choice
-P = E.p # paragraph
-PARAM = E.param # named property value
-PRE = E.pre # preformatted text
-Q = E.q # short inline quotation
-S = E.s # strike-through text style (DEPRECATED)
-SAMP = E.samp # sample program output, scripts, etc.
-SCRIPT = E.script # script statements
-SELECT = E.select # option selector
-SMALL = E.small # small text style
-SPAN = E.span # generic language/style container
-STRIKE = E.strike # strike-through text (DEPRECATED)
-STRONG = E.strong # strong emphasis
-STYLE = E.style # style info
-SUB = E.sub # subscript
-SUP = E.sup # superscript
-TABLE = E.table #
-TBODY = E.tbody # table body
-TD = E.td # table data cell
-TEXTAREA = E.textarea # multi-line text field
-TFOOT = E.tfoot # table footer
-TH = E.th # table header cell
-THEAD = E.thead # table header
-TITLE = E.title # document title
-TR = E.tr # table row
-TT = E.tt # teletype or monospaced text style
-U = E.u # underlined text style (DEPRECATED)
-UL = E.ul # unordered list
-VAR = E.var # instance of a variable or program argument
-
-# attributes (only reserved words are included here)
-ATTR = dict
-def CLASS(v): return {'class': v}
-def FOR(v): return {'for': v}
From scoder at codespeak.net Wed Feb 13 21:48:53 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 13 Feb 2008 21:48:53 +0100 (CET)
Subject: [Lxml-checkins] r51455 - in lxml/trunk: . src/lxml
Message-ID: <20080213204853.1092816840A@codespeak.net>
Author: scoder
Date: Wed Feb 13 21:48:52 2008
New Revision: 51455
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/xslt.pxi
Log:
r3466 at delle: sbehnel | 2008-02-13 17:22:23 +0100
rst doc fixes
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Wed Feb 13 21:48:52 2008
@@ -1473,22 +1473,24 @@
attribute default values are requested.
Available boolean keyword arguments:
- - attribute_defaults - read default attributes from DTD
- - dtd_validation - validate (if DTD is available)
- - load_dtd - use DTD for parsing
- - no_network - prevent network access for related files (default: True)
- - ns_clean - clean up redundant namespace declarations
- - recover - try hard to parse through broken XML
- - remove_blank_text - discard blank text nodes
- - remove_comments - discard comments
- - remove_pis - discard processing instructions
- - compact - safe memory for short text content (default: True)
- - resolve_entities - replace entities by their text value (default: True)
+
+ - attribute_defaults - read default attributes from DTD
+ - dtd_validation - validate (if DTD is available)
+ - load_dtd - use DTD for parsing
+ - no_network - prevent network access for related files (default: True)
+ - ns_clean - clean up redundant namespace declarations
+ - recover - try hard to parse through broken XML
+ - remove_blank_text - discard blank text nodes
+ - remove_comments - discard comments
+ - remove_pis - discard processing instructions
+ - compact - safe memory for short text content (default: True)
+ - resolve_entities - replace entities by their text value (default: True)
Other keyword arguments:
- - encoding - override the document encoding
- - target - a parser target object that will receive the parse events
- - schema - an XMLSchema to validate against
+
+ - encoding - override the document encoding
+ - target - a parser target object that will receive the parse events
+ - schema - an XMLSchema to validate against
Note that you should avoid sharing parsers between threads. While this is
not harmful, it is more efficient to use separate parsers. This does not
@@ -1612,17 +1614,19 @@
this off.
Available boolean keyword arguments:
- - recover - try hard to parse through broken HTML (default: True)
- - no_network - prevent network access for related files (default: True)
- - remove_blank_text - discard empty text nodes
- - remove_comments - discard comments
- - remove_pis - discard processing instructions
- - compact - safe memory for short text content (default: True)
+
+ - recover - try hard to parse through broken HTML (default: True)
+ - no_network - prevent network access for related files (default: True)
+ - remove_blank_text - discard empty text nodes
+ - remove_comments - discard comments
+ - remove_pis - discard processing instructions
+ - compact - safe memory for short text content (default: True)
Other keyword arguments:
- - encoding - override the document encoding
- - target - a parser target object that will receive the parse events
- - schema - an XMLSchema to validate against
+
+ - encoding - override the document encoding
+ - target - a parser target object that will receive the parse events
+ - schema - an XMLSchema to validate against
Note that you should avoid sharing parsers between threads for performance
reasons.
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Wed Feb 13 21:48:52 2008
@@ -174,11 +174,13 @@
passing any of the following boolean keyword arguments. All of
them default to True to allow access.
- - read_file
- - write_file
- - create_dir
- - read_network
- - write_network
+ - read_file
+ - write_file
+ - create_dir
+ - read_network
+ - write_network
+
+ See `XSLT`.
"""
cdef xslt.xsltSecurityPrefs* _prefs
def __init__(self, *, read_file=True, write_file=True, create_dir=True,
@@ -264,15 +266,18 @@
>>> result = transform(xml_tree)
Keyword arguments of the constructor:
- - regexp: enable exslt regular expression support in XPath
- (default: True)
- - access_control: access restrictions for network or file
- system (see `XSLTAccessControl`)
+
+ - regexp: enable exslt regular expression support in XPath
+ (default: True)
+ - access_control: access restrictions for network or file
+ system (see `XSLTAccessControl`)
Keyword arguments of the XSLT call:
- - profile_run: enable XSLT profiling (default: False)
- Other keyword arguments of the call are passed to the stylesheet.
+ - profile_run: enable XSLT profiling (default: False)
+
+ Other keyword arguments of the call are passed to the stylesheet
+ as parameters.
"""
cdef _XSLTContext _context
cdef xslt.xsltStylesheet* _c_style
From scoder at codespeak.net Wed Feb 13 21:48:57 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 13 Feb 2008 21:48:57 +0100 (CET)
Subject: [Lxml-checkins] r51456 - in lxml/trunk: . src/lxml/html
Message-ID: <20080213204857.25C27168406@codespeak.net>
Author: scoder
Date: Wed Feb 13 21:48:56 2008
New Revision: 51456
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/html/builder.py
Log:
r3467 at delle: sbehnel | 2008-02-13 17:24:33 +0100
cleanup
Modified: lxml/trunk/src/lxml/html/builder.py
==============================================================================
--- lxml/trunk/src/lxml/html/builder.py (original)
+++ lxml/trunk/src/lxml/html/builder.py Wed Feb 13 21:48:56 2008
@@ -1,29 +1,6 @@
# --------------------------------------------------------------------
# The ElementTree toolkit is
-#
# Copyright (c) 1999-2004 by Fredrik Lundh
-#
-# By obtaining, using, and/or copying this software and/or its
-# associated documentation, you agree that you have read, understood,
-# and will comply with the following terms and conditions:
-#
-# Permission to use, copy, modify, and distribute this software and
-# its associated documentation for any purpose and without fee is
-# hereby granted, provided that the above copyright notice appears in
-# all copies, and that both that copyright notice and this permission
-# notice appear in supporting documentation, and that the name of
-# Secret Labs AB or the author not be used in advertising or publicity
-# pertaining to distribution of the software without specific, written
-# prior permission.
-#
-# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
-# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
-# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
-# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
-# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
-# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
-# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
-# OF THIS SOFTWARE.
# --------------------------------------------------------------------
"""
From scoder at codespeak.net Wed Feb 13 21:49:02 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 13 Feb 2008 21:49:02 +0100 (CET)
Subject: [Lxml-checkins] r51457 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20080213204902.07C7E16840B@codespeak.net>
Author: scoder
Date: Wed Feb 13 21:49:01 2008
New Revision: 51457
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/lxml.pyclasslookup.pyx
lxml/trunk/src/lxml/tests/test_pyclasslookup.py
Log:
r3468 at delle: sbehnel | 2008-02-13 19:52:53 +0100
child iteration in lxml.pyclasslookup
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed Feb 13 21:49:01 2008
@@ -8,6 +8,8 @@
Features added
--------------
+* Child iteration in ``lxml.pyclasslookup``.
+
* Docstrings now reflect the signature of functions and methods to
make them visible in API docs and ``help()``
Modified: lxml/trunk/src/lxml/lxml.pyclasslookup.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.pyclasslookup.pyx (original)
+++ lxml/trunk/src/lxml/lxml.pyclasslookup.pyx Wed Feb 13 21:49:01 2008
@@ -1,3 +1,29 @@
+"""
+A whole-tree Element class lookup scheme for `lxml.etree`.
+
+This class lookup scheme allows access to the entire XML tree. To use
+it, let a class inherit from `PythonElementClassLookup` and
+re-implement the ``lookup(self, doc, root)`` method:
+
+ >>> from lxml import etree, pyclasslookup
+ >>>
+ >>> class MyElementClass(etree.ElementBase):
+ ... honkey = True
+ ...
+ >>> class MyLookup(pyclasslookup.PythonElementClassLookup):
+ ... def lookup(self, doc, root):
+ ... if root.tag == "sometag":
+ ... return MyElementClass
+ ... else:
+ ... for child in root:
+ ... if child.tag == "someothertag":
+ ... return MyElementClass
+ ... # delegate to default
+ ... return None
+
+See http://codespeak.net/lxml/element_classes.html
+"""
+
from etreepublic cimport _Document, _Element, ElementBase
from etreepublic cimport ElementClassLookup, FallbackElementClassLookup
from etreepublic cimport elementFactory, import_lxml__etree
@@ -128,6 +154,9 @@
c_node = cetree.findChildBackwards(self._c_node, 0)
return c_node != NULL
+ def __iter__(self):
+ return iter(self.getchildren())
+
def get(self, key, default=None):
"""Gets an element attribute.
"""
@@ -258,6 +287,10 @@
self._lookup_function = _lookup_class
def lookup(self, doc, element):
+ """lookup(self, doc, element)
+
+ Override this method to implement your own lookup scheme.
+ """
return None
cdef object _lookup_class(state, _Document doc, tree.xmlNode* c_node):
Modified: lxml/trunk/src/lxml/tests/test_pyclasslookup.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_pyclasslookup.py (original)
+++ lxml/trunk/src/lxml/tests/test_pyclasslookup.py Wed Feb 13 21:49:01 2008
@@ -245,6 +245,20 @@
self.assertEquals([ c.tag for c in root.getchildren() ],
child_tags)
+ def test_lookup_iter_children(self):
+ el_class = self._buildElementClass()
+ el_class.CHILD_TAGS = None
+ def lookup(doc, el):
+ if el_class.CHILD_TAGS is None:
+ el_class.CHILD_TAGS = [ c.tag for c in el ]
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ child_tags = root.CHILD_TAGS
+ self.assertNotEquals(None, child_tags)
+ self.assertEquals([ c.tag for c in root.getchildren() ],
+ child_tags)
+
def test_lookup_getparent(self):
el_class = self._buildElementClass()
el_class.PARENT = None
From scoder at codespeak.net Wed Feb 13 21:49:05 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 13 Feb 2008 21:49:05 +0100 (CET)
Subject: [Lxml-checkins] r51458 - lxml/trunk
Message-ID: <20080213204905.6C68D16840B@codespeak.net>
Author: scoder
Date: Wed Feb 13 21:49:05 2008
New Revision: 51458
Modified:
lxml/trunk/ (props changed)
lxml/trunk/Makefile
Log:
r3469 at delle: sbehnel | 2008-02-13 19:54:11 +0100
removed private stuff from generated API documentation
Modified: lxml/trunk/Makefile
==============================================================================
--- lxml/trunk/Makefile (original)
+++ lxml/trunk/Makefile Wed Feb 13 21:49:05 2008
@@ -42,10 +42,11 @@
rm -fr doc/html/api
@[ -x "`which epydoc`" ] \
&& (cd src && echo "Generating API docs ..." && \
- PYTHONPATH=. epydoc -v --docformat "restructuredtext en" -o ../doc/html/api --name lxml --url http://codespeak.net/lxml/ lxml/) \
+ PYTHONPATH=. epydoc -v --docformat "restructuredtext en" \
+ -o ../doc/html/api --no-private --exclude='[.]html[.]tests|[.]_' \
+ --name lxml --url http://codespeak.net/lxml/ lxml/) \
|| (echo "not generating epydoc API documentation")
-# XXX What should the default be?
test: test_inplace
valtest: valgrind_test_inplace
From scoder at codespeak.net Wed Feb 13 21:49:13 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 13 Feb 2008 21:49:13 +0100 (CET)
Subject: [Lxml-checkins] r51459 - in lxml/trunk: . src/lxml src/lxml/html
src/lxml/tests
Message-ID: <20080213204913.46859168407@codespeak.net>
Author: scoder
Date: Wed Feb 13 21:49:12 2008
New Revision: 51459
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/ElementInclude.py
lxml/trunk/src/lxml/builder.py
lxml/trunk/src/lxml/cssselect.py
lxml/trunk/src/lxml/doctestcompare.py
lxml/trunk/src/lxml/html/__init__.py
lxml/trunk/src/lxml/html/builder.py
lxml/trunk/src/lxml/html/clean.py
lxml/trunk/src/lxml/lxml.etree.pyx
lxml/trunk/src/lxml/lxml.objectify.pyx
lxml/trunk/src/lxml/sax.py
lxml/trunk/src/lxml/tests/__init__.py
Log:
r3470 at delle: sbehnel | 2008-02-13 20:12:19 +0100
loads of docstrings
Modified: lxml/trunk/src/lxml/ElementInclude.py
==============================================================================
--- lxml/trunk/src/lxml/ElementInclude.py (original)
+++ lxml/trunk/src/lxml/ElementInclude.py Wed Feb 13 21:49:12 2008
@@ -41,9 +41,14 @@
# OF THIS SOFTWARE.
# --------------------------------------------------------------------
-##
-# Limited XInclude support for the ElementTree package.
-##
+"""
+Limited XInclude support for the ElementTree package.
+
+While lxml.etree has full support for XInclude (see
+`etree.ElementTree.xinclude()`), this module provides a simpler, pure
+Python, ElementTree compatible implementation that supports a simple
+form of custom URL resolvers.
+"""
import copy, etree
from urlparse import urljoin
Modified: lxml/trunk/src/lxml/builder.py
==============================================================================
--- lxml/trunk/src/lxml/builder.py (original)
+++ lxml/trunk/src/lxml/builder.py Wed Feb 13 21:49:12 2008
@@ -33,6 +33,10 @@
# OF THIS SOFTWARE.
# --------------------------------------------------------------------
+"""
+The ``E`` Element factory for generating XML documents.
+"""
+
import etree as ET
try:
Modified: lxml/trunk/src/lxml/cssselect.py
==============================================================================
--- lxml/trunk/src/lxml/cssselect.py (original)
+++ lxml/trunk/src/lxml/cssselect.py Wed Feb 13 21:49:12 2008
@@ -1,3 +1,9 @@
+"""CSS Selectors based on XPath.
+
+This module supports selecting XML/HTML tags based on CSS selectors.
+See the `CSSSelector` class for details.
+"""
+
import re
from lxml import etree
@@ -11,7 +17,17 @@
pass
class CSSSelector(etree.XPath):
+ """A CSS selector.
+
+ Usage::
+ >>> from lxml import etree, cssselect
+ >>> select = cssselect.CSSSelector("a tag > child")
+
+ >>> root = etree.XML("TEXT")
+ >>> [ el.tag for el in select(root) ]
+ ['child']
+ """
def __init__(self, css):
path = css_to_xpath(css)
etree.XPath.__init__(self, path)
@@ -575,9 +591,8 @@
self.condition = other.condition
class XPathExprOr(XPathExpr):
-
"""
- Represents on |'d expressions. Note that unfortunately it isn't
+ Represents |'d expressions. Note that unfortunately it isn't
the union, it's the sum, so duplicate elements will appear.
"""
Modified: lxml/trunk/src/lxml/doctestcompare.py
==============================================================================
--- lxml/trunk/src/lxml/doctestcompare.py (original)
+++ lxml/trunk/src/lxml/doctestcompare.py Wed Feb 13 21:49:12 2008
@@ -1,8 +1,16 @@
"""
lxml-based doctest output comparison.
-To use this you must call ``lxmldoctest.install()``, which will cause
-doctest to use this in all subsequent calls.
+Note: normally, you should just import the `lxml.usedoctest` and
+`lxml.html.usedoctest` modules from within a doctest, instead of this
+one::
+
+ >>> import lxml.usedoctest # for XML output
+
+ >>> import lxml.html.usedoctest # for HTML output
+
+To use this module directly, you must call ``lxmldoctest.install()``,
+which will cause doctest to use this in all subsequent calls.
This changes the way output is checked and comparisons are made for
XML or HTML-like content.
Modified: lxml/trunk/src/lxml/html/__init__.py
==============================================================================
--- lxml/trunk/src/lxml/html/__init__.py (original)
+++ lxml/trunk/src/lxml/html/__init__.py Wed Feb 13 21:49:12 2008
@@ -1,3 +1,6 @@
+"""The ``lxml.html`` tool set for HTML handling.
+"""
+
import threading
import re
import urlparse
Modified: lxml/trunk/src/lxml/html/builder.py
==============================================================================
--- lxml/trunk/src/lxml/html/builder.py (original)
+++ lxml/trunk/src/lxml/html/builder.py Wed Feb 13 21:49:12 2008
@@ -4,7 +4,7 @@
# --------------------------------------------------------------------
"""
-HTML specialisation of ``builder.py`` by Fredrik Lundh
+A set of HTML generator tags for building HTML documents.
Usage::
Modified: lxml/trunk/src/lxml/html/clean.py
==============================================================================
--- lxml/trunk/src/lxml/html/clean.py (original)
+++ lxml/trunk/src/lxml/html/clean.py Wed Feb 13 21:49:12 2008
@@ -1,3 +1,9 @@
+"""A cleanup tool for HTML.
+
+Removes unwanted tags and content. See the `Cleaner` class for
+details.
+"""
+
import re
import copy
import urlparse
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Wed Feb 13 21:49:12 2008
@@ -1,5 +1,5 @@
-"""The lxml.etree module implements the extended ElementTree API for
-XML.
+"""The ``lxml.etree`` module implements the extended ElementTree API
+for XML.
"""
__docformat__ = "restructuredtext en"
Modified: lxml/trunk/src/lxml/lxml.objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.objectify.pyx (original)
+++ lxml/trunk/src/lxml/lxml.objectify.pyx Wed Feb 13 21:49:12 2008
@@ -1,3 +1,7 @@
+"""The ``lxml.objectify`` module implements a Python object API for
+XML. It is based on `lxml.etree`.
+"""
+
from etreepublic cimport _Document, _Element, ElementBase
from etreepublic cimport _ElementIterator, ElementClassLookup
from etreepublic cimport elementFactory, import_lxml__etree, textOf
Modified: lxml/trunk/src/lxml/sax.py
==============================================================================
--- lxml/trunk/src/lxml/sax.py (original)
+++ lxml/trunk/src/lxml/sax.py Wed Feb 13 21:49:12 2008
@@ -1,3 +1,15 @@
+"""
+SAX-based adapter to copy trees from/to the Python standard library.
+
+Use the `ElementTreeContentHandler` class to build an ElementTree from
+SAX events.
+
+Use the `ElementTreeProducer` class or the `saxify()` function to fire
+the SAX events of an ElementTree against a SAX ContentHandler.
+
+See http://codespeak.net/lxml/sax.html
+"""
+
from xml.sax.handler import ContentHandler
import etree
from etree import ElementTree, SubElement
@@ -220,4 +232,7 @@
return prefix + ':' + local_name
def saxify(element_or_tree, content_handler):
+ """One-shot helper to generate SAX events from an XML tree and fire
+ them against a SAX ContentHandler.
+ """
return ElementTreeProducer(element_or_tree, content_handler).saxify()
Modified: lxml/trunk/src/lxml/tests/__init__.py
==============================================================================
--- lxml/trunk/src/lxml/tests/__init__.py (original)
+++ lxml/trunk/src/lxml/tests/__init__.py Wed Feb 13 21:49:12 2008
@@ -1,2 +1,4 @@
-# this is a package
+"""
+The lxml test suite for lxml, ElementTree and cElementTree.
+"""
From scoder at codespeak.net Wed Feb 13 21:49:16 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 13 Feb 2008 21:49:16 +0100 (CET)
Subject: [Lxml-checkins] r51460 - lxml/trunk
Message-ID: <20080213204916.11E9416840D@codespeak.net>
Author: scoder
Date: Wed Feb 13 21:49:16 2008
New Revision: 51460
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
Log:
r3471 at delle: sbehnel | 2008-02-13 20:29:49 +0100
changelog
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed Feb 13 21:49:16 2008
@@ -10,8 +10,8 @@
* Child iteration in ``lxml.pyclasslookup``.
-* Docstrings now reflect the signature of functions and methods to
- make them visible in API docs and ``help()``
+* Loads of new docstrings reflect the signature of functions and
+ methods to make them visible in API docs and ``help()``
Bugs fixed
----------
From scoder at codespeak.net Wed Feb 13 21:49:22 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 13 Feb 2008 21:49:22 +0100 (CET)
Subject: [Lxml-checkins] r51461 - in lxml/trunk: . src/lxml/html
Message-ID: <20080213204922.2B339168406@codespeak.net>
Author: scoder
Date: Wed Feb 13 21:49:21 2008
New Revision: 51461
Added:
lxml/trunk/src/lxml/html/_setmixin.py
- copied unchanged from r50752, lxml/trunk/src/lxml/html/setmixin.py
Removed:
lxml/trunk/src/lxml/html/setmixin.py
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/html/__init__.py
Log:
r3472 at delle: sbehnel | 2008-02-13 20:31:51 +0100
renamed lxml.html.setmixin to _setmixin to make clear it's not a real part of lxml
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed Feb 13 21:49:21 2008
@@ -25,6 +25,11 @@
Other changes
-------------
+* The previously public module ``lxml.html.setmixin`` was renamed to
+ ``lxml.html._setmixin`` as it is not an official part of lxml. If
+ you want to use it, feel free to copy it over to your own source
+ base.
+
* Passing ``--with-xslt-config=/path/to/xslt-config`` to setup.py will
override the ``xslt-config`` script that is used to determine the C
compiler options.
Modified: lxml/trunk/src/lxml/html/__init__.py
==============================================================================
--- lxml/trunk/src/lxml/html/__init__.py (original)
+++ lxml/trunk/src/lxml/html/__init__.py Wed Feb 13 21:49:21 2008
@@ -8,7 +8,7 @@
from lxml import etree
from lxml.html import defs
from lxml import cssselect
-from lxml.html.setmixin import SetMixin
+from lxml.html._setmixin import SetMixin
try:
from UserDict import DictMixin
except ImportError:
Deleted: /lxml/trunk/src/lxml/html/setmixin.py
==============================================================================
--- /lxml/trunk/src/lxml/html/setmixin.py Wed Feb 13 21:49:21 2008
+++ (empty file)
@@ -1,115 +0,0 @@
-class SetMixin(object):
-
- """
- Mix-in for sets. You must define __iter__, add, remove
- """
-
- def __len__(self):
- length = 0
- for item in self:
- length += 1
- return length
-
- def __contains__(self, item):
- for has_item in self:
- if item == has_item:
- return True
- return False
-
- def issubset(self, other):
- for item in other:
- if item not in self:
- return False
- return True
-
- __le__ = issubset
-
- def issuperset(self, other):
- for item in self:
- if item not in other:
- return False
- return True
-
- __ge__ = issuperset
-
- def union(self, other):
- return self | other
-
- def __or__(self, other):
- new = self.copy()
- new |= other
- return new
-
- def intersection(self, other):
- return self & other
-
- def __and__(self, other):
- new = self.copy()
- new &= other
- return new
-
- def difference(self, other):
- return self - other
-
- def __sub__(self, other):
- new = self.copy()
- new -= other
- return new
-
- def symmetric_difference(self, other):
- return self ^ other
-
- def __xor__(self, other):
- new = self.copy()
- new ^= other
- return new
-
- def copy(self):
- return set(self)
-
- def update(self, other):
- for item in other:
- self.add(item)
-
- def __ior__(self, other):
- self.update(other)
- return self
-
- def intersection_update(self, other):
- for item in self:
- if item not in other:
- self.remove(item)
-
- def __iand__(self, other):
- self.intersection_update(other)
- return self
-
- def difference_update(self, other):
- for item in other:
- if item in self:
- self.remove(item)
-
- def __isub__(self, other):
- self.difference_update(other)
- return self
-
- def symmetric_difference_update(self, other):
- for item in other:
- if item in self:
- self.remove(item)
- else:
- self.add(item)
-
- def __ixor__(self, other):
- self.symmetric_difference_update(other)
- return self
-
- def discard(self, item):
- try:
- self.remove(item)
- except KeyError:
- pass
-
- def clear(self):
- for item in list(self):
- self.remove(item)
From scoder at codespeak.net Wed Feb 13 21:49:39 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 13 Feb 2008 21:49:39 +0100 (CET)
Subject: [Lxml-checkins] r51462 - lxml/trunk
Message-ID: <20080213204939.5CE8116840A@codespeak.net>
Author: scoder
Date: Wed Feb 13 21:49:38 2008
New Revision: 51462
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
Log:
r3484 at delle: sbehnel | 2008-02-13 21:48:51 +0100
changelog fix
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed Feb 13 21:49:38 2008
@@ -19,6 +19,10 @@
* The module ``lxml.html.builder`` was duplicated as
``lxml.htmlbuilder``
+* Form elements would return None for ``form.fields.keys()`` if there
+ was an unnamed input field. Now unnamed input fields are completely
+ ignored.
+
* Setting an element slice in objectify could insert slice-overlapping
elements at the wrong position.
From scoder at codespeak.net Wed Feb 13 22:31:38 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 13 Feb 2008 22:31:38 +0100 (CET)
Subject: [Lxml-checkins] r51465 - in lxml/trunk: . doc
Message-ID: <20080213213138.B601A168411@codespeak.net>
Author: scoder
Date: Wed Feb 13 22:31:37 2008
New Revision: 51465
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/doc/main.txt
Log:
r3489 at delle: sbehnel | 2008-02-13 22:30:41 +0100
release date of 2.0.1
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed Feb 13 22:31:37 2008
@@ -2,8 +2,8 @@
lxml changelog
==============
-2.0.1 (Under development)
-=========================
+2.0.1 (2008-02-13)
+==================
Features added
--------------
@@ -57,10 +57,6 @@
Bugs fixed
----------
-* Form elements would return None for ``form.fields.keys()`` if there
- was an unnamed input field. Now unnamed input fields are completely
- ignored.
-
Other changes
-------------
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Wed Feb 13 22:31:37 2008
@@ -145,7 +145,7 @@
.. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/
.. _`this key`: pubkey.asc
-The latest version is `lxml 2.0.1`_, released 2008-02-08
+The latest version is `lxml 2.0.1`_, released 2008-02-13
(`changes for 2.0.1`_). `Older versions`_ are listed below.
.. _`Older versions`: #old-versions
From scoder at codespeak.net Wed Feb 13 22:35:36 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 13 Feb 2008 22:35:36 +0100 (CET)
Subject: [Lxml-checkins] r51466 - lxml/trunk
Message-ID: <20080213213536.B1A12168411@codespeak.net>
Author: scoder
Date: Wed Feb 13 22:35:34 2008
New Revision: 51466
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
Log:
r3493 at delle: sbehnel | 2008-02-13 22:34:59 +0100
changelog
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed Feb 13 22:35:34 2008
@@ -29,6 +29,9 @@
Other changes
-------------
+* The generated API documentation was cleaned up and disburdened from
+ non-public classes etc.
+
* The previously public module ``lxml.html.setmixin`` was renamed to
``lxml.html._setmixin`` as it is not an official part of lxml. If
you want to use it, feel free to copy it over to your own source
From scoder at codespeak.net Wed Feb 13 23:02:17 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 13 Feb 2008 23:02:17 +0100 (CET)
Subject: [Lxml-checkins] r51467 - lxml/tag/lxml-2.0.1
Message-ID: <20080213220217.1BB4E168406@codespeak.net>
Author: scoder
Date: Wed Feb 13 23:02:17 2008
New Revision: 51467
Added:
lxml/tag/lxml-2.0.1/
- copied from r51466, lxml/trunk/
Log:
tag for lxml 2.0.1
From scoder at codespeak.net Thu Feb 14 09:19:44 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 14 Feb 2008 09:19:44 +0100 (CET)
Subject: [Lxml-checkins] r51473 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20080214081944.1629E168411@codespeak.net>
Author: scoder
Date: Thu Feb 14 09:19:43 2008
New Revision: 51473
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.pyclasslookup.pyx
lxml/trunk/src/lxml/tests/test_pyclasslookup.py
Log:
r3497 at delle: sbehnel | 2008-02-14 09:17:23 +0100
iterchildren() method in lxml.pyclasslookup, faster proxy instantiation
Modified: lxml/trunk/src/lxml/lxml.pyclasslookup.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.pyclasslookup.pyx (original)
+++ lxml/trunk/src/lxml/lxml.pyclasslookup.pyx Thu Feb 14 09:19:43 2008
@@ -1,9 +1,10 @@
"""
A whole-tree Element class lookup scheme for `lxml.etree`.
-This class lookup scheme allows access to the entire XML tree. To use
-it, let a class inherit from `PythonElementClassLookup` and
-re-implement the ``lookup(self, doc, root)`` method:
+This class lookup scheme allows access to the entire XML tree in
+read-only mode. To use it, let a class inherit from
+`PythonElementClassLookup` and re-implement the ``lookup(self, doc,
+root)`` method:
>>> from lxml import etree, pyclasslookup
>>>
@@ -21,6 +22,15 @@
... # delegate to default
... return None
+Note that the API of the Element objects is not complete. It is
+purely read-only and does not support all features of the normal
+`lxml.etree` API (such as XPath, extended slicing or some iteration
+methods).
+
+Also, you cannot wrap such a read-only Element in an ElementTree, and
+you must take care not to keep a reference to them outside of the
+`lookup()` method.
+
See http://codespeak.net/lxml/element_classes.html
"""
@@ -43,6 +53,7 @@
__version__ = etree.__version__
cdef class _ElementProxy:
+ "The main read-only Element proxy class (for internal use only!)."
cdef tree.xmlNode* _c_node
cdef object _source_proxy
cdef object _dependent_proxies
@@ -157,6 +168,18 @@
def __iter__(self):
return iter(self.getchildren())
+ def iterchildren(self, tag=None, *, reversed=False):
+ """iterchildren(self, tag=None, reversed=False)
+
+ Iterate over the children of this element.
+ """
+ children = self.getchildren()
+ if tag is not None:
+ children = [ el for el in children if el.tag == tag ]
+ if reversed:
+ children = children[::-1]
+ return iter(children)
+
def get(self, key, default=None):
"""Gets an element attribute.
"""
@@ -230,15 +253,21 @@
return _newProxy(self._source_proxy, c_node)
return None
+
+cdef extern from "etree_defs.h":
+ # macro call to 't->tp_new()' for fast instantiation
+ cdef _ElementProxy NEW_PROXY "PY_NEW" (object t)
+
cdef _ElementProxy _newProxy(_ElementProxy sourceProxy, tree.xmlNode* c_node):
cdef _ElementProxy el
- el = _ElementProxy()
+ el = NEW_PROXY(_ElementProxy)
el._c_node = c_node
if sourceProxy is None:
- sourceProxy = el
- el._dependent_proxies = []
- el._source_proxy = sourceProxy
- python.PyList_Append(sourceProxy._dependent_proxies, el)
+ el._source_proxy = el
+ el._dependent_proxies = [el]
+ else:
+ el._source_proxy = sourceProxy
+ python.PyList_Append(sourceProxy._dependent_proxies, el)
return el
cdef _freeProxies(_ElementProxy sourceProxy):
Modified: lxml/trunk/src/lxml/tests/test_pyclasslookup.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_pyclasslookup.py (original)
+++ lxml/trunk/src/lxml/tests/test_pyclasslookup.py Thu Feb 14 09:19:43 2008
@@ -259,6 +259,43 @@
self.assertEquals([ c.tag for c in root.getchildren() ],
child_tags)
+ def test_lookup_iterchildren(self):
+ el_class = self._buildElementClass()
+ el_class.CHILD_TAGS = None
+ def lookup(doc, el):
+ if el_class.CHILD_TAGS is None:
+ el_class.CHILD_TAGS = [ c.tag for c in el.iterchildren() ]
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ child_tags = root.CHILD_TAGS
+ self.assertNotEquals(None, child_tags)
+ self.assertEquals([ c.tag for c in root.getchildren() ],
+ child_tags)
+
+ def test_lookup_iterchildren_tag(self):
+ el_class = self._buildElementClass()
+ el_class.CHILD_TAGS = None
+ def lookup(doc, el):
+ if not el_class.CHILD_TAGS:
+ el_class.CHILD_TAGS = [
+ c.tag for c in el.iterchildren(tag='{objectified}c2') ]
+ return el_class
+ self._setClassLookup(lookup)
+
+ root = self.XML(xml_str)
+ child_tags = root.CHILD_TAGS
+ self.assertNotEquals(None, child_tags)
+ self.assertEquals([], child_tags)
+
+ c1 = root[0]
+ child_tags = root.CHILD_TAGS
+ self.assertNotEquals(None, child_tags)
+ self.assertNotEquals([], child_tags)
+ self.assertEquals(
+ [ c.tag for c in root[0].iterchildren(tag='{objectified}c2') ],
+ child_tags)
+
def test_lookup_getparent(self):
el_class = self._buildElementClass()
el_class.PARENT = None
From scoder at codespeak.net Thu Feb 14 09:19:48 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 14 Feb 2008 09:19:48 +0100 (CET)
Subject: [Lxml-checkins] r51474 - in lxml/trunk: . doc
Message-ID: <20080214081948.23218168412@codespeak.net>
Author: scoder
Date: Thu Feb 14 09:19:47 2008
New Revision: 51474
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/FAQ.txt
Log:
r3498 at delle: sbehnel | 2008-02-14 09:18:57 +0100
link to a new lxml tutorial
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Thu Feb 14 09:19:47 2008
@@ -57,19 +57,23 @@
Is there a tutorial?
--------------------
-Read the `lxml.etree Tutorial`_. While this is still work in progress (just
-as any good documentation), it provides an overview of the most important
-concepts in ``lxml.etree``. If you want to help out, the tutorial is a very
-good place to start.
+Read the `lxml.etree Tutorial`_. While this is still work in progress
+(just as any good documentation), it provides an overview of the most
+important concepts in ``lxml.etree``. If you want to help out,
+improving the tutorial is a very good place to start.
There is also a `tutorial for ElementTree`_ which works for ``lxml.etree``.
The `API documentation`_ also contains many examples for ``lxml.etree``. To
learn using ``lxml.objectify``, read the `objectify documentation`_.
+John Shipman has written another tutorial called `Python XML
+processing with lxml`_ that contains lots of examples.
+
.. _`lxml.etree Tutorial`: tutorial.html
.. _`tutorial for ElementTree`: http://effbot.org/zone/element.htm
.. _`API documentation`: api.html
.. _`objectify documentation`: objectify.html
+.. _`Python XML processing with lxml`: http://www.nmt.edu/tcc/help/pubs/pylxml/
Where can I find more documentation about lxml?
From scoder at codespeak.net Thu Feb 14 09:44:26 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 14 Feb 2008 09:44:26 +0100 (CET)
Subject: [Lxml-checkins] r51475 - in lxml/trunk: . src/lxml
Message-ID: <20080214084426.E809E168412@codespeak.net>
Author: scoder
Date: Thu Feb 14 09:44:25 2008
New Revision: 51475
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.pyclasslookup.pyx
Log:
r3501 at delle: sbehnel | 2008-02-14 09:30:32 +0100
use 'cpdef' for internally used method in lxml.pyclasslookup
Modified: lxml/trunk/src/lxml/lxml.pyclasslookup.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.pyclasslookup.pyx (original)
+++ lxml/trunk/src/lxml/lxml.pyclasslookup.pyx Thu Feb 14 09:44:25 2008
@@ -207,7 +207,7 @@
self._assertNode()
return cetree.collectAttributes(self._c_node, 3)
- def getchildren(self):
+ cpdef getchildren(self):
"""Returns all subelements. The elements are returned in document
order.
"""
From scoder at codespeak.net Thu Feb 14 15:52:31 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 14 Feb 2008 15:52:31 +0100 (CET)
Subject: [Lxml-checkins] r51487 - in lxml/branch/lxml-2.0: . doc src/lxml
src/lxml/html src/lxml/html/tests src/lxml/tests
Message-ID: <20080214145231.26BA8168412@codespeak.net>
Author: scoder
Date: Thu Feb 14 15:52:28 2008
New Revision: 51487
Added:
lxml/branch/lxml-2.0/src/lxml/html/_setmixin.py
- copied unchanged from r51486, lxml/trunk/src/lxml/html/_setmixin.py
Removed:
lxml/branch/lxml-2.0/src/lxml/html/setmixin.py
lxml/branch/lxml-2.0/src/lxml/htmlbuilder.py
Modified:
lxml/branch/lxml-2.0/CHANGES.txt
lxml/branch/lxml-2.0/Makefile
lxml/branch/lxml-2.0/doc/FAQ.txt
lxml/branch/lxml-2.0/doc/build.txt
lxml/branch/lxml-2.0/doc/main.txt
lxml/branch/lxml-2.0/doc/performance.txt
lxml/branch/lxml-2.0/setup.py
lxml/branch/lxml-2.0/setupinfo.py
lxml/branch/lxml-2.0/src/lxml/ElementInclude.py
lxml/branch/lxml-2.0/src/lxml/builder.py
lxml/branch/lxml-2.0/src/lxml/classlookup.pxi
lxml/branch/lxml-2.0/src/lxml/cssselect.py
lxml/branch/lxml-2.0/src/lxml/docloader.pxi
lxml/branch/lxml-2.0/src/lxml/doctestcompare.py
lxml/branch/lxml-2.0/src/lxml/dtd.pxi
lxml/branch/lxml-2.0/src/lxml/extensions.pxi
lxml/branch/lxml-2.0/src/lxml/html/__init__.py
lxml/branch/lxml-2.0/src/lxml/html/builder.py
lxml/branch/lxml-2.0/src/lxml/html/clean.py
lxml/branch/lxml-2.0/src/lxml/html/diff.py
lxml/branch/lxml-2.0/src/lxml/html/tests/test_diff.txt
lxml/branch/lxml-2.0/src/lxml/html/tests/test_forms.txt
lxml/branch/lxml-2.0/src/lxml/html/usedoctest.py
lxml/branch/lxml-2.0/src/lxml/iterparse.pxi
lxml/branch/lxml-2.0/src/lxml/lxml.etree.pyx
lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx
lxml/branch/lxml-2.0/src/lxml/lxml.pyclasslookup.pyx
lxml/branch/lxml-2.0/src/lxml/nsclasses.pxi
lxml/branch/lxml-2.0/src/lxml/objectpath.pxi
lxml/branch/lxml-2.0/src/lxml/parser.pxi
lxml/branch/lxml-2.0/src/lxml/relaxng.pxi
lxml/branch/lxml-2.0/src/lxml/sax.py
lxml/branch/lxml-2.0/src/lxml/schematron.pxi
lxml/branch/lxml-2.0/src/lxml/tests/__init__.py
lxml/branch/lxml-2.0/src/lxml/tests/common_imports.py
lxml/branch/lxml-2.0/src/lxml/tests/test_elementtree.py
lxml/branch/lxml-2.0/src/lxml/tests/test_objectify.py
lxml/branch/lxml-2.0/src/lxml/tests/test_pyclasslookup.py
lxml/branch/lxml-2.0/src/lxml/usedoctest.py
lxml/branch/lxml-2.0/src/lxml/xinclude.pxi
lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi
lxml/branch/lxml-2.0/src/lxml/xmlid.pxi
lxml/branch/lxml-2.0/src/lxml/xmlschema.pxi
lxml/branch/lxml-2.0/src/lxml/xpath.pxi
lxml/branch/lxml-2.0/src/lxml/xslt.pxi
lxml/branch/lxml-2.0/version.txt
Log:
trunk merge
Modified: lxml/branch/lxml-2.0/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-2.0/CHANGES.txt (original)
+++ lxml/branch/lxml-2.0/CHANGES.txt Thu Feb 14 15:52:28 2008
@@ -2,6 +2,46 @@
lxml changelog
==============
+2.0.1 (2008-02-13)
+==================
+
+Features added
+--------------
+
+* Child iteration in ``lxml.pyclasslookup``.
+
+* Loads of new docstrings reflect the signature of functions and
+ methods to make them visible in API docs and ``help()``
+
+Bugs fixed
+----------
+
+* The module ``lxml.html.builder`` was duplicated as
+ ``lxml.htmlbuilder``
+
+* Form elements would return None for ``form.fields.keys()`` if there
+ was an unnamed input field. Now unnamed input fields are completely
+ ignored.
+
+* Setting an element slice in objectify could insert slice-overlapping
+ elements at the wrong position.
+
+Other changes
+-------------
+
+* The generated API documentation was cleaned up and disburdened from
+ non-public classes etc.
+
+* The previously public module ``lxml.html.setmixin`` was renamed to
+ ``lxml.html._setmixin`` as it is not an official part of lxml. If
+ you want to use it, feel free to copy it over to your own source
+ base.
+
+* Passing ``--with-xslt-config=/path/to/xslt-config`` to setup.py will
+ override the ``xslt-config`` script that is used to determine the C
+ compiler options.
+
+
2.0 (2008-02-01)
================
Modified: lxml/branch/lxml-2.0/Makefile
==============================================================================
--- lxml/branch/lxml-2.0/Makefile (original)
+++ lxml/branch/lxml-2.0/Makefile Thu Feb 14 15:52:28 2008
@@ -42,10 +42,11 @@
rm -fr doc/html/api
@[ -x "`which epydoc`" ] \
&& (cd src && echo "Generating API docs ..." && \
- PYTHONPATH=. epydoc -v -o ../doc/html/api --name lxml --url http://codespeak.net/lxml/ lxml/) \
+ PYTHONPATH=. epydoc -v --docformat "restructuredtext en" \
+ -o ../doc/html/api --no-private --exclude='[.]html[.]tests|[.]_' \
+ --name lxml --url http://codespeak.net/lxml/ lxml/) \
|| (echo "not generating epydoc API documentation")
-# XXX What should the default be?
test: test_inplace
valtest: valgrind_test_inplace
Modified: lxml/branch/lxml-2.0/doc/FAQ.txt
==============================================================================
--- lxml/branch/lxml-2.0/doc/FAQ.txt (original)
+++ lxml/branch/lxml-2.0/doc/FAQ.txt Thu Feb 14 15:52:28 2008
@@ -57,19 +57,23 @@
Is there a tutorial?
--------------------
-Read the `lxml.etree Tutorial`_. While this is still work in progress (just
-as any good documentation), it provides an overview of the most important
-concepts in ``lxml.etree``. If you want to help out, the tutorial is a very
-good place to start.
+Read the `lxml.etree Tutorial`_. While this is still work in progress
+(just as any good documentation), it provides an overview of the most
+important concepts in ``lxml.etree``. If you want to help out,
+improving the tutorial is a very good place to start.
There is also a `tutorial for ElementTree`_ which works for ``lxml.etree``.
The `API documentation`_ also contains many examples for ``lxml.etree``. To
learn using ``lxml.objectify``, read the `objectify documentation`_.
+John Shipman has written another tutorial called `Python XML
+processing with lxml`_ that contains lots of examples.
+
.. _`lxml.etree Tutorial`: tutorial.html
.. _`tutorial for ElementTree`: http://effbot.org/zone/element.htm
.. _`API documentation`: api.html
.. _`objectify documentation`: objectify.html
+.. _`Python XML processing with lxml`: http://www.nmt.edu/tcc/help/pubs/pylxml/
Where can I find more documentation about lxml?
@@ -116,10 +120,11 @@
Also note that the compatibility to the ElementTree library does not
require projects to set a hard dependency on lxml - as long as they do
-not need lxml's enhanced feature set.
+not take advantage of lxml's enhanced feature set.
* cssutils_, a CSS parser and toolkit, can be used with ``lxml.cssselect``
* Deliverance_, a content theming tool
+* `Enfold Proxy 4`_, a web server accelerator with on-the-fly XSLT processing
* Inteproxy_, a secure HTTP proxy
* lwebstring_, an XML template engine
* OpenXMLlib_, a library for handling OpenXML document meta data
@@ -136,6 +141,7 @@
.. _cssutils: http://code.google.com/p/cssutils/source/browse/trunk/examples/style.py?r=917
.. _Deliverance: http://www.openplans.org/projects/deliverance/project-home
+.. _`Enfold Proxy 4`: http://www.enfoldsystems.com/Products/Proxy/4
.. _gocept.lxml: http://pypi.python.org/pypi/gocept.lxml
.. _Inteproxy: http://lists.wald.intevation.org/pipermail/inteproxy-devel/2007-February/000000.html
.. _lwebstring: http://pypi.python.org/pypi/lwebstring
Modified: lxml/branch/lxml-2.0/doc/build.txt
==============================================================================
--- lxml/branch/lxml-2.0/doc/build.txt (original)
+++ lxml/branch/lxml-2.0/doc/build.txt Thu Feb 14 15:52:28 2008
@@ -23,22 +23,31 @@
Cython
------
-The lxml.etree and lxml.objectify modules are written in Cython_. Since we
-distribute the Cython-generated .c files with lxml releases, however, you do
-not need Cython to build lxml from the normal release sources.
-
+.. _EasyInstall: http://peak.telecommunity.com/DevCenter/EasyInstall
.. _Cython: http://www.cython.org
-If you are interested in building lxml from a Subversion checkout or want to
-be an lxml developer, you do need a working Cython installation. You can use
-EasyInstall_ to install it::
+The lxml.etree and lxml.objectify modules are written in Cython_.
+Since we distribute the Cython-generated .c files with lxml releases,
+however, you do not need Cython to build lxml from the normal release
+sources. We even encourage you to *not install Cython* for a normal
+release build, as the generated C code can vary quite heavily between
+Cython versions, which may or may not generate correct code for lxml.
+The pre-generated release sources were tested and therefore are known
+to work.
+
+So, if you want a reliable build of lxml, we suggest to a) use a
+source release of lxml and b) disable or uninstall Cython for the
+build.
+
+*Only* if you are interested in building lxml from a Subversion
+checkout (e.g. to test a bug fix that has not been release yet) or if
+want to be an lxml developer, then you do need a working Cython
+installation. You can use EasyInstall_ to install it::
- easy_install Cython==0.9.6.11
-
-.. _EasyInstall: http://peak.telecommunity.com/DevCenter/EasyInstall
+ easy_install Cython==0.9.6.11b
-lxml currently requires at least Cython 0.9.6.11, but later versions
-should work.
+lxml currently requires Cython 0.9.6.11b, later versions were not
+tested.
Subversion
@@ -79,9 +88,13 @@
If you get errors about missing header files (e.g. ``libxml/xmlversion.h``)
then you need to make sure the development packages of both libxml2
-and libxslt are properly installed. If this doesn't help, you may
-have to add the location of the header files to the include path
-like::
+and libxslt are properly installed. Try passing the following option to
+setup.py to make sure the right config is found::
+
+ python setup.py build --with-xslt-config=/path/to/xslt-config
+
+If this doesn't help, you may have to add the location of the header
+files to the include path like::
python setup.py build_ext -i -I /usr/include/libxml2
@@ -165,15 +178,28 @@
an older version. The result can be segfaults on this platform that are hard
to track down.
-To make sure the newer libxml2 and libxslt versions are used (e.g. under
-fink), you should add the directory where you installed the libraries to the
-``DYLD_LIBRARY_PATH`` environment variable. This seems to fix a lot of
-problems for users.
-
-Alternatively, you can build lxml statically. A way to do this on MS Windows
-is described in the next section, but it should be easy to adapt it for
-Mac-OS. That way, you can always be sure you use the versions you compiled
-lxml with, regardless of the runtime environement.
+To make sure the newer libxml2 and libxslt versions (e.g. those
+provided by fink or macports) are used at *build time*, you must take
+care that the script ``xslt-config`` is found from the newly installed
+version when running the build setup. The system libraries also
+provide this script, but the new one must come first in the PATH. The
+best way to make sure the right version is used is by passing the path
+to the script as an option to setup.py::
+
+ python setup.py build --with-xslt-config=/path/to/xslt-config
+
+To make sure the newer libxml2 and libxslt versions are used at
+*runtime*, you should add *all* directories where the newer libraries
+are installed (i.e. libxml2, libxslt and libexslt) to the
+``DYLD_LIBRARY_PATH`` environment variable when you use lxml (i.e. not
+only at build time). This seems to fix a lot of problems for users.
+
+Please read this thread about `experiences with MacOS-X`_ if you
+encounter problems. It also has a `buildout for lxml`_ that you can
+use.
+
+.. _`experiences with MacOS-X`: http://thread.gmane.org/gmane.comp.python.lxml.devel/3290/focus=3290
+.. _`buildout for lxml`: http://thread.gmane.org/gmane.comp.python.lxml.devel/3290/focus=3297
Static linking on Windows
Modified: lxml/branch/lxml-2.0/doc/main.txt
==============================================================================
--- lxml/branch/lxml-2.0/doc/main.txt (original)
+++ lxml/branch/lxml-2.0/doc/main.txt Thu Feb 14 15:52:28 2008
@@ -145,8 +145,8 @@
.. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/
.. _`this key`: pubkey.asc
-The latest version is `lxml 2.0`_, released 2008-02-01
-(`changes for 2.0`_). `Older versions`_ are listed below.
+The latest version is `lxml 2.0.1`_, released 2008-02-13
+(`changes for 2.0.1`_). `Older versions`_ are listed below.
.. _`Older versions`: #old-versions
@@ -206,21 +206,7 @@
Old Versions
------------
-* `lxml 2.0beta2`_, released 2008-01-26 (`changes for 2.0beta2`_)
-
-* `lxml 2.0beta1`_, released 2008-01-11 (`changes for 2.0beta1`_)
-
-* `lxml 2.0alpha6`_, released 2007-12-19 (`changes for 2.0alpha6`_)
-
-* `lxml 2.0alpha5`_, released 2007-11-24 (`changes for 2.0alpha5`_)
-
-* `lxml 2.0alpha4`_, released 2007-10-07 (`changes for 2.0alpha4`_)
-
-* `lxml 2.0alpha3`_, released 2007-09-26 (`changes for 2.0alpha3`_)
-
-* `lxml 2.0alpha2`_, released 2007-09-15 (`changes for 2.0alpha2`_)
-
-* `lxml 2.0alpha1`_, released 2007-09-02 (`changes for 2.0alpha1`_)
+* `lxml 2.0`_, released 2008-02-01 (`changes for 2.0`_)
* `lxml 1.3.6`_, released 2007-10-29 (`changes for 1.3.6`_)
@@ -272,15 +258,8 @@
* `lxml 0.5`_, released 2005-04-08
+.. _`lxml 2.0.1`: lxml-2.0.1.tgz
.. _`lxml 2.0`: lxml-2.0.tgz
-.. _`lxml 2.0beta2`: lxml-2.0beta2.tgz
-.. _`lxml 2.0beta1`: lxml-2.0beta1.tgz
-.. _`lxml 2.0alpha6`: lxml-2.0alpha6.tgz
-.. _`lxml 2.0alpha5`: lxml-2.0alpha5.tgz
-.. _`lxml 2.0alpha4`: lxml-2.0alpha4.tgz
-.. _`lxml 2.0alpha3`: lxml-2.0alpha3.tgz
-.. _`lxml 2.0alpha2`: lxml-2.0alpha2.tgz
-.. _`lxml 2.0alpha1`: lxml-2.0alpha1.tgz
.. _`lxml 1.3.6`: lxml-1.3.6.tgz
.. _`lxml 1.3.5`: lxml-1.3.5.tgz
.. _`lxml 1.3.4`: lxml-1.3.4.tgz
@@ -306,15 +285,8 @@
.. _`lxml 0.5.1`: lxml-0.5.1.tgz
.. _`lxml 0.5`: lxml-0.5.tgz
+.. _`changes for 2.0.1`: changes-2.0.1.html
.. _`changes for 2.0`: changes-2.0.html
-.. _`changes for 2.0beta2`: changes-2.0beta2.html
-.. _`changes for 2.0beta1`: changes-2.0beta1.html
-.. _`changes for 2.0alpha6`: changes-2.0alpha6.html
-.. _`changes for 2.0alpha5`: changes-2.0alpha5.html
-.. _`changes for 2.0alpha4`: changes-2.0alpha4.html
-.. _`changes for 2.0alpha3`: changes-2.0alpha3.html
-.. _`changes for 2.0alpha2`: changes-2.0alpha2.html
-.. _`changes for 2.0alpha1`: changes-2.0alpha1.html
.. _`changes for 1.3.6`: changes-1.3.6.html
.. _`changes for 1.3.5`: changes-1.3.5.html
.. _`changes for 1.3.4`: changes-1.3.4.html
Modified: lxml/branch/lxml-2.0/doc/performance.txt
==============================================================================
--- lxml/branch/lxml-2.0/doc/performance.txt (original)
+++ lxml/branch/lxml-2.0/doc/performance.txt Thu Feb 14 15:52:28 2008
@@ -71,8 +71,8 @@
a specific part of the API yourself, please consider sending it to the lxml
mailing list.
-The timings cited below compare lxml 2.0alpha (with libxml2 2.6.30) to
-the December 2007 SVN trunk versions of ElementTree (1.3) and
+The timings cited below compare lxml 2.0 final (with libxml2 2.6.31)
+to the January 2008 SVN trunk versions of ElementTree (1.3alpha) and
cElementTree (1.2.7). They were run single-threaded on a 1.8GHz Intel
Core Duo machine under Ubuntu Linux 7.10 (Gutsy). The C libraries
were compiled with the same platform specific optimisation flags. The
@@ -117,23 +117,23 @@
1.2, lxml is still more than 5 times as fast as the much improved
ElementTree 1.3::
- lxe: tostring_utf16 (SATR T1) 23.4821 msec/pass
+ lxe: tostring_utf16 (SATR T1) 19.0921 msec/pass
cET: tostring_utf16 (SATR T1) 129.8430 msec/pass
ET : tostring_utf16 (SATR T1) 136.1301 msec/pass
- lxe: tostring_utf16 (UATR T1) 23.4859 msec/pass
+ lxe: tostring_utf16 (UATR T1) 20.4630 msec/pass
cET: tostring_utf16 (UATR T1) 130.1570 msec/pass
ET : tostring_utf16 (UATR T1) 136.3101 msec/pass
- lxe: tostring_utf16 (S-TR T2) 24.2729 msec/pass
+ lxe: tostring_utf16 (S-TR T2) 18.8632 msec/pass
cET: tostring_utf16 (S-TR T2) 136.9388 msec/pass
ET : tostring_utf16 (S-TR T2) 143.9550 msec/pass
- lxe: tostring_utf8 (S-TR T2) 18.4860 msec/pass
+ lxe: tostring_utf8 (S-TR T2) 14.4310 msec/pass
cET: tostring_utf8 (S-TR T2) 137.0859 msec/pass
ET : tostring_utf8 (S-TR T2) 144.3110 msec/pass
- lxe: tostring_utf8 (U-TR T3) 2.7399 msec/pass
+ lxe: tostring_utf8 (U-TR T3) 2.6381 msec/pass
cET: tostring_utf8 (U-TR T3) 52.1040 msec/pass
ET : tostring_utf8 (U-TR T3) 53.1070 msec/pass
@@ -205,10 +205,10 @@
(given in seconds)::
lxe: -- S- U- -A SA UA
- T1: 0.0914 0.0875 0.0872 0.0892 0.0882 0.0900
- T2: 0.0894 0.0897 0.0892 0.0988 0.0978 0.0974
- T3: 0.0219 0.0194 0.0189 0.0570 0.0570 0.0573
- T4: 0.0004 0.0003 0.0003 0.0012 0.0012 0.0012
+ T1: 0.0783 0.0777 0.0774 0.0787 0.0781 0.0783
+ T2: 0.0799 0.0796 0.0799 0.0879 0.0882 0.0886
+ T3: 0.0245 0.0216 0.0217 0.0577 0.0575 0.0572
+ T4: 0.0003 0.0003 0.0003 0.0011 0.0011 0.0011
cET: -- S- U- -A SA UA
T1: 0.0272 0.0264 0.0267 0.0268 0.0261 0.0265
T2: 0.0280 0.0274 0.0273 0.0273 0.0276 0.0275
@@ -235,21 +235,21 @@
create a shallow copy of their list of children, lxml has to create a
Python object for each child and collect them in a list::
- lxe: root_list_children (--TR T1) 0.0169 msec/pass
+ lxe: root_list_children (--TR T1) 0.0160 msec/pass
cET: root_list_children (--TR T1) 0.0081 msec/pass
ET : root_list_children (--TR T1) 0.0541 msec/pass
- lxe: root_list_children (--TR T2) 0.2339 msec/pass
+ lxe: root_list_children (--TR T2) 0.2100 msec/pass
cET: root_list_children (--TR T2) 0.0319 msec/pass
ET : root_list_children (--TR T2) 0.4420 msec/pass
This handicap is also visible when accessing single children::
- lxe: first_child (--TR T2) 0.2470 msec/pass
+ lxe: first_child (--TR T2) 0.2429 msec/pass
cET: first_child (--TR T2) 0.2170 msec/pass
ET : first_child (--TR T2) 0.9968 msec/pass
- lxe: last_child (--TR T1) 0.2482 msec/pass
+ lxe: last_child (--TR T1) 0.2470 msec/pass
cET: last_child (--TR T1) 0.2291 msec/pass
ET : last_child (--TR T1) 0.9830 msec/pass
@@ -258,11 +258,11 @@
The data structure used by libxml2 is a linked tree, and thus, a
linked list of children::
- lxe: middle_child (--TR T1) 0.2789 msec/pass
+ lxe: middle_child (--TR T1) 0.2759 msec/pass
cET: middle_child (--TR T1) 0.2229 msec/pass
ET : middle_child (--TR T1) 1.0030 msec/pass
- lxe: middle_child (--TR T2) 1.9610 msec/pass
+ lxe: middle_child (--TR T2) 1.7071 msec/pass
cET: middle_child (--TR T2) 0.2229 msec/pass
ET : middle_child (--TR T2) 0.9930 msec/pass
@@ -274,7 +274,7 @@
in. This results in a major performance difference for creating independent
Elements that end up in independently created documents::
- lxe: create_elements (--TC T2) 3.1691 msec/pass
+ lxe: create_elements (--TC T2) 2.8961 msec/pass
cET: create_elements (--TC T2) 0.1929 msec/pass
ET : create_elements (--TC T2) 1.3590 msec/pass
@@ -282,11 +282,11 @@
are supposed to end up in, either as SubElements of an Element or using the
explicit ``Element.makeelement()`` call::
- lxe: makeelement (--TC T2) 2.2650 msec/pass
+ lxe: makeelement (--TC T2) 1.9000 msec/pass
cET: makeelement (--TC T2) 0.3211 msec/pass
ET : makeelement (--TC T2) 1.6358 msec/pass
- lxe: create_subelements (--TC T2) 1.9531 msec/pass
+ lxe: create_subelements (--TC T2) 1.7891 msec/pass
cET: create_subelements (--TC T2) 0.2351 msec/pass
ET : create_subelements (--TC T2) 3.2270 msec/pass
@@ -305,11 +305,11 @@
The following benchmark appends all root children of the second tree to the
root of the first tree::
- lxe: append_from_document (--TR T1,T2) 3.8681 msec/pass
+ lxe: append_from_document (--TR T1,T2) 3.3841 msec/pass
cET: append_from_document (--TR T1,T2) 0.2699 msec/pass
ET : append_from_document (--TR T1,T2) 1.2650 msec/pass
- lxe: append_from_document (--TR T3,T4) 0.0570 msec/pass
+ lxe: append_from_document (--TR T3,T4) 0.0441 msec/pass
cET: append_from_document (--TR T3,T4) 0.0169 msec/pass
ET : append_from_document (--TR T3,T4) 0.0820 msec/pass
@@ -322,20 +322,20 @@
This difference is not always as visible, but applies to most parts of the
API, like inserting newly created elements::
- lxe: insert_from_document (--TR T1,T2) 5.8019 msec/pass
+ lxe: insert_from_document (--TR T1,T2) 5.7020 msec/pass
cET: insert_from_document (--TR T1,T2) 0.4041 msec/pass
ET : insert_from_document (--TR T1,T2) 1.4789 msec/pass
or replacing the child slice by a newly created element::
- lxe: replace_children_element (--TC T1) 0.2480 msec/pass
+ lxe: replace_children_element (--TC T1) 0.2210 msec/pass
cET: replace_children_element (--TC T1) 0.0238 msec/pass
ET : replace_children_element (--TC T1) 0.1600 msec/pass
as opposed to replacing the slice with an existing element from the
same document::
- lxe: replace_children (--TC T1) 0.0188 msec/pass
+ lxe: replace_children (--TC T1) 0.0179 msec/pass
cET: replace_children (--TC T1) 0.0119 msec/pass
ET : replace_children (--TC T1) 0.0739 msec/pass
@@ -347,16 +347,16 @@
Deep copying a tree is fast in lxml::
- lxe: deepcopy_all (--TR T1) 10.9420 msec/pass
+ lxe: deepcopy_all (--TR T1) 9.7558 msec/pass
cET: deepcopy_all (--TR T1) 120.6188 msec/pass
ET : deepcopy_all (--TR T1) 902.6880 msec/pass
- lxe: deepcopy_all (-ATR T2) 12.5830 msec/pass
+ lxe: deepcopy_all (-ATR T2) 12.3210 msec/pass
cET: deepcopy_all (-ATR T2) 136.9810 msec/pass
ET : deepcopy_all (-ATR T2) 944.2801 msec/pass
- lxe: deepcopy_all (S-TR T3) 4.1170 msec/pass
- cET: deepcopy_all (S-TR T3) 36.1221 msec/pass
+ lxe: deepcopy_all (S-TR T3) 8.3981 msec/pass
+ cET: deepcopy_all (S-TR T3) 35.6541 msec/pass
ET : deepcopy_all (S-TR T3) 221.6041 msec/pass
So, for example, if you have a database-like scenario where you parse in a
@@ -372,37 +372,37 @@
especially if few elements are of interest or the target element tag name is
known, lxml is a good choice::
- lxe: getiterator_all (--TR T1) 5.8582 msec/pass
+ lxe: getiterator_all (--TR T1) 5.7251 msec/pass
cET: getiterator_all (--TR T1) 39.9489 msec/pass
ET : getiterator_all (--TR T1) 23.0000 msec/pass
- lxe: getiterator_islice (--TR T2) 0.0780 msec/pass
+ lxe: getiterator_islice (--TR T2) 0.0830 msec/pass
cET: getiterator_islice (--TR T2) 0.3440 msec/pass
ET : getiterator_islice (--TR T2) 0.2429 msec/pass
- lxe: getiterator_tag (--TR T2) 0.3119 msec/pass
+ lxe: getiterator_tag (--TR T2) 0.3011 msec/pass
cET: getiterator_tag (--TR T2) 14.1001 msec/pass
ET : getiterator_tag (--TR T2) 7.4241 msec/pass
- lxe: getiterator_tag_all (--TR T2) 0.6540 msec/pass
+ lxe: getiterator_tag_all (--TR T2) 0.6340 msec/pass
cET: getiterator_tag_all (--TR T2) 40.7901 msec/pass
ET : getiterator_tag_all (--TR T2) 21.0390 msec/pass
This translates directly into similar timings for ``Element.findall()``::
- lxe: findall (--TR T2) 8.1239 msec/pass
+ lxe: findall (--TR T2) 7.8950 msec/pass
cET: findall (--TR T2) 44.5340 msec/pass
ET : findall (--TR T2) 27.1149 msec/pass
- lxe: findall (--TR T3) 1.6870 msec/pass
+ lxe: findall (--TR T3) 1.7281 msec/pass
cET: findall (--TR T3) 12.9611 msec/pass
ET : findall (--TR T3) 8.6131 msec/pass
- lxe: findall_tag (--TR T2) 0.7660 msec/pass
+ lxe: findall_tag (--TR T2) 0.7720 msec/pass
cET: findall_tag (--TR T2) 40.6358 msec/pass
ET : findall_tag (--TR T2) 21.4581 msec/pass
- lxe: findall_tag (--TR T3) 0.2160 msec/pass
+ lxe: findall_tag (--TR T3) 0.2050 msec/pass
cET: findall_tag (--TR T3) 9.6831 msec/pass
ET : findall_tag (--TR T3) 5.2109 msec/pass
@@ -420,38 +420,38 @@
of the lxml API you use. The most straight forward way is to call the
``xpath()`` method on an Element or ElementTree::
- lxe: xpath_method (--TC T1) 1.8251 msec/pass
- lxe: xpath_method (--TC T2) 23.3159 msec/pass
- lxe: xpath_method (--TC T3) 0.1378 msec/pass
- lxe: xpath_method (--TC T4) 1.1270 msec/pass
+ lxe: xpath_method (--TC T1) 1.7459 msec/pass
+ lxe: xpath_method (--TC T2) 22.0850 msec/pass
+ lxe: xpath_method (--TC T3) 0.1309 msec/pass
+ lxe: xpath_method (--TC T4) 1.0772 msec/pass
This is well suited for testing and when the XPath expressions are as diverse
as the trees they are called on. However, if you have a single XPath
expression that you want to apply to a larger number of different elements,
the ``XPath`` class is the most efficient way to do it::
- lxe: xpath_class (--TC T1) 0.6981 msec/pass
- lxe: xpath_class (--TC T2) 3.6111 msec/pass
- lxe: xpath_class (--TC T3) 0.0591 msec/pass
- lxe: xpath_class (--TC T4) 0.1979 msec/pass
+ lxe: xpath_class (--TC T1) 0.6740 msec/pass
+ lxe: xpath_class (--TC T2) 3.1760 msec/pass
+ lxe: xpath_class (--TC T3) 0.0548 msec/pass
+ lxe: xpath_class (--TC T4) 0.1700 msec/pass
Note that this still allows you to use variables in the expression, so you can
parse it once and then adapt it through variables at call time. In other
cases, where you have a fixed Element or ElementTree and want to run different
expressions on it, you should consider the ``XPathEvaluator``::
- lxe: xpath_element (--TR T1) 0.4342 msec/pass
- lxe: xpath_element (--TR T2) 11.9958 msec/pass
- lxe: xpath_element (--TR T3) 0.1690 msec/pass
- lxe: xpath_element (--TR T4) 0.3510 msec/pass
+ lxe: xpath_element (--TR T1) 0.4151 msec/pass
+ lxe: xpath_element (--TR T2) 11.6129 msec/pass
+ lxe: xpath_element (--TR T3) 0.1299 msec/pass
+ lxe: xpath_element (--TR T4) 0.3409 msec/pass
While it looks slightly slower, creating an XPath object for each of the
expressions generates a much higher overhead here::
- lxe: xpath_class_repeat (--TC T1) 1.7619 msec/pass
- lxe: xpath_class_repeat (--TC T2) 21.9102 msec/pass
- lxe: xpath_class_repeat (--TC T3) 0.1330 msec/pass
- lxe: xpath_class_repeat (--TC T4) 1.0631 msec/pass
+ lxe: xpath_class_repeat (--TC T1) 1.6699 msec/pass
+ lxe: xpath_class_repeat (--TC T2) 20.4420 msec/pass
+ lxe: xpath_class_repeat (--TC T3) 0.1230 msec/pass
+ lxe: xpath_class_repeat (--TC T4) 0.9859 msec/pass
A longer example
@@ -608,21 +608,21 @@
tree. It avoids step-by-step Python element instantiations along the path,
which can substantially improve the access time::
- lxe: attribute (--TR T1) 9.8128 msec/pass
- lxe: attribute (--TR T2) 53.2899 msec/pass
- lxe: attribute (--TR T4) 9.6800 msec/pass
-
- lxe: objectpath (--TR T1) 5.4898 msec/pass
- lxe: objectpath (--TR T2) 48.4819 msec/pass
- lxe: objectpath (--TR T4) 5.3761 msec/pass
-
- lxe: attributes_deep (--TR T1) 56.3290 msec/pass
- lxe: attributes_deep (--TR T2) 62.4361 msec/pass
- lxe: attributes_deep (--TR T4) 15.8000 msec/pass
-
- lxe: objectpath_deep (--TR T1) 49.0060 msec/pass
- lxe: objectpath_deep (--TR T2) 52.5169 msec/pass
- lxe: objectpath_deep (--TR T4) 7.1371 msec/pass
+ lxe: attribute (--TR T1) 9.4581 msec/pass
+ lxe: attribute (--TR T2) 52.5560 msec/pass
+ lxe: attribute (--TR T4) 9.1729 msec/pass
+
+ lxe: objectpath (--TR T1) 4.8690 msec/pass
+ lxe: objectpath (--TR T2) 47.8780 msec/pass
+ lxe: objectpath (--TR T4) 4.7870 msec/pass
+
+ lxe: attributes_deep (--TR T1) 54.7471 msec/pass
+ lxe: attributes_deep (--TR T2) 62.7451 msec/pass
+ lxe: attributes_deep (--TR T4) 15.1050 msec/pass
+
+ lxe: objectpath_deep (--TR T1) 48.2810 msec/pass
+ lxe: objectpath_deep (--TR T2) 51.3949 msec/pass
+ lxe: objectpath_deep (--TR T4) 6.1419 msec/pass
Note, however, that parsing ObjectPath expressions is not for free either, so
this is most effective for frequently accessing the same element.
@@ -648,17 +648,17 @@
subtrees and elements) to cache, you can trade memory usage against access
speed::
- lxe: attribute_cached (--TR T1) 7.6170 msec/pass
- lxe: attribute_cached (--TR T2) 50.7941 msec/pass
- lxe: attribute_cached (--TR T4) 7.4880 msec/pass
-
- lxe: attributes_deep_cached (--TR T1) 49.9220 msec/pass
- lxe: attributes_deep_cached (--TR T2) 55.9340 msec/pass
- lxe: attributes_deep_cached (--TR T4) 10.0131 msec/pass
-
- lxe: objectpath_deep_cached (--TR T1) 44.9121 msec/pass
- lxe: objectpath_deep_cached (--TR T2) 48.2371 msec/pass
- lxe: objectpath_deep_cached (--TR T4) 3.9630 msec/pass
+ lxe: attribute_cached (--TR T1) 7.5061 msec/pass
+ lxe: attribute_cached (--TR T2) 50.1881 msec/pass
+ lxe: attribute_cached (--TR T4) 7.4170 msec/pass
+
+ lxe: attributes_deep_cached (--TR T1) 48.7239 msec/pass
+ lxe: attributes_deep_cached (--TR T2) 55.2199 msec/pass
+ lxe: attributes_deep_cached (--TR T4) 9.9740 msec/pass
+
+ lxe: objectpath_deep_cached (--TR T1) 43.4160 msec/pass
+ lxe: objectpath_deep_cached (--TR T2) 47.6480 msec/pass
+ lxe: objectpath_deep_cached (--TR T4) 3.4680 msec/pass
Things to note: you cannot currently use ``weakref.WeakKeyDictionary`` objects
for this as lxml's element objects do not support weak references (which are
Modified: lxml/branch/lxml-2.0/setup.py
==============================================================================
--- lxml/branch/lxml-2.0/setup.py (original)
+++ lxml/branch/lxml-2.0/setup.py Thu Feb 14 15:52:28 2008
@@ -76,7 +76,8 @@
It extends the ElementTree API significantly to offer support for XPath,
RelaxNG, XML Schema, XSLT, C14N and much more.
-To contact the project, go to the project home page or see our bug tracker at
+To contact the project, go to the `project home page
+`_ or see our bug tracker at
https://launchpad.net/lxml
In case you want to use the current in-development version of lxml, you can
Modified: lxml/branch/lxml-2.0/setupinfo.py
==============================================================================
--- lxml/branch/lxml-2.0/setupinfo.py (original)
+++ lxml/branch/lxml-2.0/setupinfo.py Thu Feb 14 15:52:28 2008
@@ -81,7 +81,7 @@
return static_library_dirs
# filter them from xslt-config --libs
result = []
- possible_library_dirs = flags('xslt-config --libs')
+ possible_library_dirs = flags('libs')
for possible_library_dir in possible_library_dirs:
if possible_library_dir.startswith('-L'):
result.append(possible_library_dir[2:])
@@ -95,7 +95,7 @@
return static_include_dirs
# filter them from xslt-config --cflags
result = []
- possible_include_dirs = flags('xslt-config --cflags')
+ possible_include_dirs = flags('cflags')
for possible_include_dir in possible_include_dirs:
if possible_include_dir.startswith('-I'):
result.append(possible_include_dir[2:])
@@ -114,7 +114,7 @@
return result
# anything from xslt-config --cflags that doesn't start with -I
- possible_cflags = flags('xslt-config --cflags')
+ possible_cflags = flags('cflags')
for possible_cflag in possible_cflags:
if not possible_cflag.startswith('-I'):
result.append(possible_cflag)
@@ -127,8 +127,9 @@
if OPTION_WITHOUT_THREADING:
macros.append(('WITHOUT_THREADING', None))
return macros
-
-def flags(cmd):
+
+def flags(option):
+ cmd = "%s --%s" % (find_xslt_config(), option)
try:
import subprocess
except ImportError:
@@ -145,6 +146,22 @@
print("** make sure the development packages of libxml2 and libxslt are installed **\n")
return str(rf.read()).split()
+XSLT_CONFIG = None
+
+def find_xslt_config():
+ global XSLT_CONFIG
+ if XSLT_CONFIG:
+ return XSLT_CONFIG
+ option = '--with-xslt-config='
+ for arg in sys.argv:
+ if arg.startswith(option):
+ sys.argv.remove(arg)
+ XSLT_CONFIG = arg[len(option):]
+ return XSLT_CONFIG
+ else:
+ XSLT_CONFIG = 'xslt-config'
+ return XSLT_CONFIG
+
def has_option(name):
try:
sys.argv.remove('--%s' % name)
Modified: lxml/branch/lxml-2.0/src/lxml/ElementInclude.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/ElementInclude.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/ElementInclude.py Thu Feb 14 15:52:28 2008
@@ -41,9 +41,14 @@
# OF THIS SOFTWARE.
# --------------------------------------------------------------------
-##
-# Limited XInclude support for the ElementTree package.
-##
+"""
+Limited XInclude support for the ElementTree package.
+
+While lxml.etree has full support for XInclude (see
+`etree.ElementTree.xinclude()`), this module provides a simpler, pure
+Python, ElementTree compatible implementation that supports a simple
+form of custom URL resolvers.
+"""
import copy, etree
from urlparse import urljoin
Modified: lxml/branch/lxml-2.0/src/lxml/builder.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/builder.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/builder.py Thu Feb 14 15:52:28 2008
@@ -33,6 +33,10 @@
# OF THIS SOFTWARE.
# --------------------------------------------------------------------
+"""
+The ``E`` Element factory for generating XML documents.
+"""
+
import etree as ET
try:
Modified: lxml/branch/lxml-2.0/src/lxml/classlookup.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/classlookup.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/classlookup.pxi Thu Feb 14 15:52:28 2008
@@ -53,7 +53,9 @@
# class to store element class lookup functions
cdef public class ElementClassLookup [ type LxmlElementClassLookupType,
object LxmlElementClassLookup ]:
- """Superclass of Element class lookups.
+ """ElementClassLookup(self)
+
+ Superclass of Element class lookups.
"""
cdef _element_class_lookup_function _lookup_function
def __init__(self):
@@ -62,18 +64,20 @@
cdef public class FallbackElementClassLookup(ElementClassLookup) \
[ type LxmlFallbackElementClassLookupType,
object LxmlFallbackElementClassLookup ]:
- """Superclass of Element class lookups with additional fallback.
+ """FallbackElementClassLookup(self, fallback=None)
+
+ Superclass of Element class lookups with additional fallback.
"""
cdef readonly ElementClassLookup fallback
cdef _element_class_lookup_function _fallback_function
def __init__(self, ElementClassLookup fallback=None):
self._lookup_function = NULL # use default lookup
if fallback is not None:
- self.setFallback(fallback)
+ self._setFallback(fallback)
else:
self._fallback_function = _lookupDefaultElementClass
- def setFallback(self, ElementClassLookup lookup not None):
+ cdef void _setFallback(self, ElementClassLookup lookup):
"""Sets the fallback scheme for this lookup method.
"""
self.fallback = lookup
@@ -81,6 +85,20 @@
if self._fallback_function is NULL:
self._fallback_function = _lookupDefaultElementClass
+ def set_fallback(self, ElementClassLookup lookup not None):
+ """set_fallback(self, lookup)
+
+ Sets the fallback scheme for this lookup method.
+ """
+ self._setFallback(lookup)
+
+ def setFallback(self, ElementClassLookup lookup not None):
+ """Sets the fallback scheme for this lookup method.
+
+ :deprecated: use ``set_fallback()`` instead.
+ """
+ self._setFallback(lookup)
+
cdef object _callFallback(self, _Document doc, xmlNode* c_node):
return self._fallback_function(self.fallback, doc, c_node)
@@ -89,7 +107,8 @@
# Custom Element class lookup schemes
cdef class ElementDefaultClassLookup(ElementClassLookup):
- """Element class lookup scheme that always returns the default Element
+ """ElementDefaultClassLookup(self, element=None, comment=None, pi=None, entity=None)
+ Element class lookup scheme that always returns the default Element
class.
The keyword arguments ``element``, ``comment``, ``pi`` and ``entity``
@@ -163,13 +182,14 @@
assert 0, "Unknown node type: %s" % c_node.type
cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup):
- """Checks an attribute of an Element and looks up the value in a class
- dictionary.
+ """AttributeBasedElementClassLookup(self, attribute_name, class_mapping, fallback=None)
+ Checks an attribute of an Element and looks up the value in a
+ class dictionary.
Arguments:
- * attribute name - '{ns}name' style string
- * class mapping - Python dict mapping attribute values to Element classes
- * fallback - optional fallback lookup mechanism
+ - attribute name - '{ns}name' style string
+ - class mapping - Python dict mapping attribute values to Element classes
+ - fallback - optional fallback lookup mechanism
A None key in the class mapping will be checked if the attribute is
missing.
@@ -207,7 +227,8 @@
cdef class ParserBasedElementClassLookup(FallbackElementClassLookup):
- """Element class lookup based on the XML parser.
+ """ParserBasedElementClassLookup(self, fallback=None)
+ Element class lookup based on the XML parser.
"""
def __init__(self, ElementClassLookup fallback=None):
FallbackElementClassLookup.__init__(self, fallback)
@@ -221,7 +242,8 @@
cdef class CustomElementClassLookup(FallbackElementClassLookup):
- """Element class lookup based on a subclass method.
+ """CustomElementClassLookup(self, fallback=None)
+ Element class lookup based on a subclass method.
You can inherit from this class and override the method::
@@ -240,6 +262,7 @@
self._lookup_function = _custom_class_lookup
def lookup(self, type, doc, namespace, name):
+ "lookup(self, type, doc, namespace, name)"
return None
cdef object _custom_class_lookup(state, _Document doc, xmlNode* c_node):
@@ -291,11 +314,14 @@
LOOKUP_ELEMENT_CLASS = function
def setElementClassLookup(ElementClassLookup lookup = None):
- "@deprecated: use ``set_element_class_lookup(lookup)`` instead"
+ ":deprecated: use ``set_element_class_lookup(lookup)`` instead"
set_element_class_lookup(lookup)
def set_element_class_lookup(ElementClassLookup lookup = None):
- "Set the global default element class lookup method."
+ """set_element_class_lookup(lookup = None)
+
+ Set the global default element class lookup method.
+ """
if lookup is None or lookup._lookup_function is NULL:
_setElementClassLookupFunction(NULL, None)
else:
Modified: lxml/branch/lxml-2.0/src/lxml/cssselect.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/cssselect.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/cssselect.py Thu Feb 14 15:52:28 2008
@@ -1,3 +1,9 @@
+"""CSS Selectors based on XPath.
+
+This module supports selecting XML/HTML tags based on CSS selectors.
+See the `CSSSelector` class for details.
+"""
+
import re
from lxml import etree
@@ -11,7 +17,17 @@
pass
class CSSSelector(etree.XPath):
+ """A CSS selector.
+
+ Usage::
+ >>> from lxml import etree, cssselect
+ >>> select = cssselect.CSSSelector("a tag > child")
+
+ >>> root = etree.XML("TEXT")
+ >>> [ el.tag for el in select(root) ]
+ ['child']
+ """
def __init__(self, css):
path = css_to_xpath(css)
etree.XPath.__init__(self, path)
@@ -575,9 +591,8 @@
self.condition = other.condition
class XPathExprOr(XPathExpr):
-
"""
- Represents on |'d expressions. Note that unfortunately it isn't
+ Represents |'d expressions. Note that unfortunately it isn't
the union, it's the sum, so duplicate elements will appear.
"""
Modified: lxml/branch/lxml-2.0/src/lxml/docloader.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/docloader.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/docloader.pxi Thu Feb 14 15:52:28 2008
@@ -15,7 +15,9 @@
cdef class Resolver:
"This is the base class of all resolvers."
def resolve(self, system_url, public_id, context):
- """Override this method to resolve an external source by
+ """resolve(self, system_url, public_id, context)
+
+ Override this method to resolve an external source by
``system_url`` and ``public_id``. The third argument is an
opaque context object.
@@ -24,7 +26,9 @@
return None
def resolve_empty(self, context):
- """Return an empty input document.
+ """resolve_empty(self, context)
+
+ Return an empty input document.
Pass context as parameter.
"""
@@ -34,7 +38,9 @@
return doc_ref
def resolve_string(self, string, context, *, base_url=None):
- """Return a parsable string as input document.
+ """resolve_string(self, string, context, base_url=None)
+
+ Return a parsable string as input document.
Pass data string and context as parameters.
@@ -49,7 +55,9 @@
return doc_ref
def resolve_filename(self, filename, context):
- """Return the name of a parsable file as input document.
+ """resolve_filename(self, filename, context)
+
+ Return the name of a parsable file as input document.
Pass filename and context as parameters.
"""
@@ -60,7 +68,9 @@
return doc_ref
def resolve_file(self, f, context):
- """Return an open file-like object as input document.
+ """resolve_file(self, f, context)
+
+ Return an open file-like object as input document.
Pass open file and context as parameters.
"""
@@ -83,7 +93,9 @@
self._default_resolver = default_resolver
def add(self, Resolver resolver not None):
- """Register a resolver.
+ """add(self, resolver)
+
+ Register a resolver.
For each requested entity, the 'resolve' method of the resolver will
be called and the result will be passed to the parser. If this method
@@ -94,6 +106,7 @@
self._resolvers.add(resolver)
def remove(self, resolver):
+ "remove(self, resolver)"
self._resolvers.discard(resolver)
cdef _ResolverRegistry _copy(self):
@@ -103,9 +116,11 @@
return registry
def copy(self):
+ "copy(self)"
return self._copy()
def resolve(self, system_url, public_id, context):
+ "resolve(self, system_url, public_id, context)"
for resolver in self._resolvers:
result = resolver.resolve(system_url, public_id, context)
if result is not None:
Modified: lxml/branch/lxml-2.0/src/lxml/doctestcompare.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/doctestcompare.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/doctestcompare.py Thu Feb 14 15:52:28 2008
@@ -1,8 +1,16 @@
"""
lxml-based doctest output comparison.
-To use this you must call ``lxmldoctest.install()``, which will cause
-doctest to use this in all subsequent calls.
+Note: normally, you should just import the `lxml.usedoctest` and
+`lxml.html.usedoctest` modules from within a doctest, instead of this
+one::
+
+ >>> import lxml.usedoctest # for XML output
+
+ >>> import lxml.html.usedoctest # for HTML output
+
+To use this module directly, you must call ``lxmldoctest.install()``,
+which will cause doctest to use this in all subsequent calls.
This changes the way output is checked and comparisons are made for
XML or HTML-like content.
@@ -32,7 +40,7 @@
import doctest
import cgi
-__all__ = ['PARSE_HTML', 'PARSE_XML', 'LXMLOutputChecker',
+__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker',
'LHTMLOutputChecker', 'install', 'temp_install']
PARSE_HTML = doctest.register_optionflag('PARSE_HTML')
Modified: lxml/branch/lxml-2.0/src/lxml/dtd.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/dtd.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/dtd.pxi Thu Feb 14 15:52:28 2008
@@ -20,7 +20,8 @@
# DTD
cdef class DTD(_Validator):
- """A DTD validator.
+ """DTD(self, file=None, external_id=None)
+ A DTD validator.
Can load from filesystem directly given a filename or file-like object.
Alternatively, pass the keyword parameter ``external_id`` to load from a
@@ -56,7 +57,9 @@
tree.xmlFreeDtd(self._c_dtd)
def __call__(self, etree):
- """Validate doc using the DTD.
+ """__call__(self, etree)
+
+ Validate doc using the DTD.
Returns true if the document is valid, false if not.
"""
Modified: lxml/branch/lxml-2.0/src/lxml/extensions.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/extensions.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/extensions.pxi Thu Feb 14 15:52:28 2008
@@ -326,7 +326,9 @@
self._temp_refs.add((<_Element>o)._doc)
def Extension(module, function_mapping=None, *, ns=None):
- """Build a dictionary of extension functions from the functions
+ """Extension(module, function_mapping=None, ns=None)
+
+ Build a dictionary of extension functions from the functions
defined in a module or the methods of an object.
As second argument, you can pass an additional mapping of
Modified: lxml/branch/lxml-2.0/src/lxml/html/__init__.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/html/__init__.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/html/__init__.py Thu Feb 14 15:52:28 2008
@@ -1,3 +1,6 @@
+"""The ``lxml.html`` tool set for HTML handling.
+"""
+
import threading
import re
import urlparse
@@ -5,7 +8,7 @@
from lxml import etree
from lxml.html import defs
from lxml import cssselect
-from lxml.html.setmixin import SetMixin
+from lxml.html._setmixin import SetMixin
try:
from UserDict import DictMixin
except ImportError:
@@ -818,7 +821,8 @@
def keys(self):
names = sets.Set()
for el in self:
- names.add(el.name)
+ if el.name is not None:
+ names.add(el.name)
return list(names)
def __iter__(self):
Modified: lxml/branch/lxml-2.0/src/lxml/html/builder.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/html/builder.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/html/builder.py Thu Feb 14 15:52:28 2008
@@ -1,5 +1,10 @@
+# --------------------------------------------------------------------
+# The ElementTree toolkit is
+# Copyright (c) 1999-2004 by Fredrik Lundh
+# --------------------------------------------------------------------
+
"""
-HTML specialisation of ``builder.py`` by Fredrik Lundh
+A set of HTML generator tags for building HTML documents.
Usage::
Modified: lxml/branch/lxml-2.0/src/lxml/html/clean.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/html/clean.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/html/clean.py Thu Feb 14 15:52:28 2008
@@ -1,3 +1,9 @@
+"""A cleanup tool for HTML.
+
+Removes unwanted tags and content. See the `Cleaner` class for
+details.
+"""
+
import re
import copy
import urlparse
Modified: lxml/branch/lxml-2.0/src/lxml/html/diff.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/html/diff.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/html/diff.py Thu Feb 14 15:52:28 2008
@@ -738,29 +738,17 @@
If skip_outer is true, then don't serialize the outermost tag
"""
-
- html_xsl = """\
-
-
-
-
-
-
-"""
- transform = etree.XSLT(etree.XML(html_xsl))
assert not isinstance(el, basestring), (
"You should pass in an element, not a string like %r" % el)
- html = str(transform(el))
+ html = etree.tostring(el, method="html", encoding="UTF-8")
if skip_outer:
# Get rid of the extra starting tag:
html = html[html.find('>')+1:]
- if skip_outer:
# Get rid of the extra end tag:
html = html[:html.rfind('<')]
- if skip_outer:
return html.strip()
else:
- return html.lstrip()
+ return html
def _fixup_ins_del_tags(doc):
"""fixup_ins_del_tags that works on an lxml document in-place
Deleted: /lxml/branch/lxml-2.0/src/lxml/html/setmixin.py
==============================================================================
--- /lxml/branch/lxml-2.0/src/lxml/html/setmixin.py Thu Feb 14 15:52:28 2008
+++ (empty file)
@@ -1,115 +0,0 @@
-class SetMixin(object):
-
- """
- Mix-in for sets. You must define __iter__, add, remove
- """
-
- def __len__(self):
- length = 0
- for item in self:
- length += 1
- return length
-
- def __contains__(self, item):
- for has_item in self:
- if item == has_item:
- return True
- return False
-
- def issubset(self, other):
- for item in other:
- if item not in self:
- return False
- return True
-
- __le__ = issubset
-
- def issuperset(self, other):
- for item in self:
- if item not in other:
- return False
- return True
-
- __ge__ = issuperset
-
- def union(self, other):
- return self | other
-
- def __or__(self, other):
- new = self.copy()
- new |= other
- return new
-
- def intersection(self, other):
- return self & other
-
- def __and__(self, other):
- new = self.copy()
- new &= other
- return new
-
- def difference(self, other):
- return self - other
-
- def __sub__(self, other):
- new = self.copy()
- new -= other
- return new
-
- def symmetric_difference(self, other):
- return self ^ other
-
- def __xor__(self, other):
- new = self.copy()
- new ^= other
- return new
-
- def copy(self):
- return set(self)
-
- def update(self, other):
- for item in other:
- self.add(item)
-
- def __ior__(self, other):
- self.update(other)
- return self
-
- def intersection_update(self, other):
- for item in self:
- if item not in other:
- self.remove(item)
-
- def __iand__(self, other):
- self.intersection_update(other)
- return self
-
- def difference_update(self, other):
- for item in other:
- if item in self:
- self.remove(item)
-
- def __isub__(self, other):
- self.difference_update(other)
- return self
-
- def symmetric_difference_update(self, other):
- for item in other:
- if item in self:
- self.remove(item)
- else:
- self.add(item)
-
- def __ixor__(self, other):
- self.symmetric_difference_update(other)
- return self
-
- def discard(self, item):
- try:
- self.remove(item)
- except KeyError:
- pass
-
- def clear(self):
- for item in list(self):
- self.remove(item)
Modified: lxml/branch/lxml-2.0/src/lxml/html/tests/test_diff.txt
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/html/tests/test_diff.txt (original)
+++ lxml/branch/lxml-2.0/src/lxml/html/tests/test_diff.txt Thu Feb 14 15:52:28 2008
@@ -204,10 +204,7 @@
Some text and
more text
>>> pfixup('''
...
One table
More stuff
''')
-
-
One table
-
More stuff
-
+
One table
More stuff
Testing split_unbalanced::
Modified: lxml/branch/lxml-2.0/src/lxml/html/tests/test_forms.txt
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/html/tests/test_forms.txt (original)
+++ lxml/branch/lxml-2.0/src/lxml/html/tests/test_forms.txt Thu Feb 14 15:52:28 2008
@@ -141,3 +141,24 @@
single_checkbox2: 'good'
check_group:
+>>> import lxml.html
+>>> tree = lxml.html.fromstring('''
+...
+...
+...
+... ''')
+>>> tree # doctest: +ELLIPSIS
+
+>>> tree.forms[0] # doctest: +ELLIPSIS
+
+>>> tree.forms[0].fields # doctest: +NOPARSE_MARKUP
+
+>>> tree.forms[0].fields.keys()
+['foo']
+>>> tree.forms[0].fields.items()
+[('foo', 'bar')]
+>>> tree.forms[0].fields.values()
+['bar']
Modified: lxml/branch/lxml-2.0/src/lxml/html/usedoctest.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/html/usedoctest.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/html/usedoctest.py Thu Feb 14 15:52:28 2008
@@ -1,3 +1,13 @@
+"""Doctest module for HTML comparison.
+
+Usage::
+
+ >>> import lxml.html.usedoctest
+ >>> # now do your HTML doctests ...
+
+See `lxml.doctestcompare`.
+"""
+
from lxml import doctestcompare
doctestcompare.temp_install(html=True, del_module=__name__)
Deleted: /lxml/branch/lxml-2.0/src/lxml/htmlbuilder.py
==============================================================================
--- /lxml/branch/lxml-2.0/src/lxml/htmlbuilder.py Thu Feb 14 15:52:28 2008
+++ (empty file)
@@ -1,154 +0,0 @@
-#
-# HTML specialisation of ``builder.py`` by Fredrik Lundh
-#
-# --------------------------------------------------------------------
-# The ElementTree toolkit is
-#
-# Copyright (c) 1999-2004 by Fredrik Lundh
-#
-# By obtaining, using, and/or copying this software and/or its
-# associated documentation, you agree that you have read, understood,
-# and will comply with the following terms and conditions:
-#
-# Permission to use, copy, modify, and distribute this software and
-# its associated documentation for any purpose and without fee is
-# hereby granted, provided that the above copyright notice appears in
-# all copies, and that both that copyright notice and this permission
-# notice appear in supporting documentation, and that the name of
-# Secret Labs AB or the author not be used in advertising or publicity
-# pertaining to distribution of the software without specific, written
-# prior permission.
-#
-# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
-# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
-# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
-# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
-# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
-# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
-# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
-# OF THIS SOFTWARE.
-# --------------------------------------------------------------------
-
-"""
-Usage::
-
- >>> from lxml.htmlbuilder import *
- >>> html = HTML(
- ... HEAD( TITLE("Hello World") ),
- ... BODY( CLASS("main"),
- ... H1("Hello World !")
- ... )
- ... )
-
- >>> import lxml.etree
- >>> print lxml.etree.tostring(html, pretty_print=True)
-
-
- Hello World
-
-
-
Hello World !
-
-
-
-"""
-
-from builder import E
-
-# elements
-A = E.a # anchor
-ABBR = E.abbr # abbreviated form (e.g., WWW, HTTP, etc.)
-ACRONYM = E.acronym #
-ADDRESS = E.address # information on author
-APPLET = E.applet # Java applet (DEPRECATED)
-AREA = E.area # client-side image map area
-B = E.b # bold text style
-BASE = E.base # document base URI
-BASEFONT = E.basefont # base font size (DEPRECATED)
-BDO = E.bdo # I18N BiDi over-ride
-BIG = E.big # large text style
-BLOCKQUOTE = E.blockquote # long quotation
-BODY = E.body # document body
-BR = E.br # forced line break
-BUTTON = E.button # push button
-CAPTION = E.caption # table caption
-CENTER = E.center # shorthand for DIV align=center (DEPRECATED)
-CITE = E.cite # citation
-CODE = E.code # computer code fragment
-COL = E.col # table column
-COLGROUP = E.colgroup # table column group
-DD = E.dd # definition description
-DEL = getattr(E, 'del') # deleted text
-DFN = E.dfn # instance definition
-DIR = E.dir # directory list (DEPRECATED)
-DIV = E.div # generic language/style container
-DL = E.dl # definition list
-DT = E.dt # definition term
-EM = E.em # emphasis
-FIELDSET = E.fieldset # form control group
-FONT = E.font # local change to font (DEPRECATED)
-FORM = E.form # interactive form
-FRAME = E.frame # subwindow
-FRAMESET = E.frameset # window subdivision
-H1 = E.h1 # heading
-H2 = E.h2 # heading
-H3 = E.h3 # heading
-H4 = E.h4 # heading
-H5 = E.h5 # heading
-H6 = E.h6 # heading
-HEAD = E.head # document head
-HR = E.hr # horizontal rule
-HTML = E.html # document root element
-I = E.i # italic text style
-IFRAME = E.iframe # inline subwindow
-IMG = E.img # Embedded image
-INPUT = E.input # form control
-INS = E.ins # inserted text
-ISINDEX = E.isindex # single line prompt (DEPRECATED)
-KBD = E.kbd # text to be entered by the user
-LABEL = E.label # form field label text
-LEGEND = E.legend # fieldset legend
-LI = E.li # list item
-LINK = E.link # a media-independent link
-MAP = E.map # client-side image map
-MENU = E.menu # menu list (DEPRECATED)
-META = E.meta # generic metainformation
-NOFRAMES = E.noframes # alternate content container for non frame-based rendering
-NOSCRIPT = E.noscript # alternate content container for non script-based rendering
-OBJECT = E.object # generic embedded object
-OL = E.ol # ordered list
-OPTGROUP = E.optgroup # option group
-OPTION = E.option # selectable choice
-P = E.p # paragraph
-PARAM = E.param # named property value
-PRE = E.pre # preformatted text
-Q = E.q # short inline quotation
-S = E.s # strike-through text style (DEPRECATED)
-SAMP = E.samp # sample program output, scripts, etc.
-SCRIPT = E.script # script statements
-SELECT = E.select # option selector
-SMALL = E.small # small text style
-SPAN = E.span # generic language/style container
-STRIKE = E.strike # strike-through text (DEPRECATED)
-STRONG = E.strong # strong emphasis
-STYLE = E.style # style info
-SUB = E.sub # subscript
-SUP = E.sup # superscript
-TABLE = E.table #
-TBODY = E.tbody # table body
-TD = E.td # table data cell
-TEXTAREA = E.textarea # multi-line text field
-TFOOT = E.tfoot # table footer
-TH = E.th # table header cell
-THEAD = E.thead # table header
-TITLE = E.title # document title
-TR = E.tr # table row
-TT = E.tt # teletype or monospaced text style
-U = E.u # underlined text style (DEPRECATED)
-UL = E.ul # unordered list
-VAR = E.var # instance of a variable or program argument
-
-# attributes (only reserved words are included here)
-ATTR = dict
-def CLASS(v): return {'class': v}
-def FOR(v): return {'for': v}
Modified: lxml/branch/lxml-2.0/src/lxml/iterparse.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/iterparse.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/iterparse.pxi Thu Feb 14 15:52:28 2008
@@ -239,9 +239,12 @@
origEnd(ctxt, name)
cdef class iterparse(_BaseParser):
- """Incremental parser. Parses XML into a tree and generates tuples
- (event, element) in a SAX-like fashion. ``event`` is any of 'start',
- 'end', 'start-ns', 'end-ns'.
+ """iterparse(self, source, events=("end",), tag=None, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, remove_blank_text=False, remove_comments=False, remove_pis=False, encoding=None, html=False, schema=None)
+ Incremental parser.
+
+ Parses XML into a tree and generates tuples (event, element) in a
+ SAX-like fashion. ``event`` is any of 'start', 'end', 'start-ns',
+ 'end-ns'.
For 'start' and 'end', ``element`` is the Element that the parser just
found opening or closing. For 'start-ns', it is a tuple (prefix, URI) of
@@ -262,17 +265,17 @@
attribute default values are requested.
Available boolean keyword arguments:
- * attribute_defaults - read default attributes from DTD
- * dtd_validation - validate (if DTD is available)
- * load_dtd - use DTD for parsing
- * no_network - prevent network access for related files
- * remove_blank_text - discard blank text nodes
- * remove_comments - discard comments
- * remove_pis - discard processing instructions
+ - attribute_defaults - read default attributes from DTD
+ - dtd_validation - validate (if DTD is available)
+ - load_dtd - use DTD for parsing
+ - no_network - prevent network access for related files
+ - remove_blank_text - discard blank text nodes
+ - remove_comments - discard comments
+ - remove_pis - discard processing instructions
Other keyword arguments:
- * encoding - override the document encoding
- * schema - an XMLSchema to validate against
+ - encoding - override the document encoding
+ - schema - an XMLSchema to validate against
"""
cdef object _source
cdef readonly object root
@@ -397,8 +400,10 @@
cdef class iterwalk:
- """A tree walker that generates events from an existing tree as if it was
- parsing XML data with ``iterparse()``.
+ """iterwalk(self, element_or_tree, events=("end",), tag=None)
+
+ A tree walker that generates events from an existing tree as if it
+ was parsing XML data with ``iterparse()``.
"""
cdef object _node_stack
cdef object _pop_node
Modified: lxml/branch/lxml-2.0/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/lxml.etree.pyx (original)
+++ lxml/branch/lxml-2.0/src/lxml/lxml.etree.pyx Thu Feb 14 15:52:28 2008
@@ -1,3 +1,9 @@
+"""The ``lxml.etree`` module implements the extended ElementTree API
+for XML.
+"""
+
+__docformat__ = "restructuredtext en"
+
cimport tree, python, config
from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs
from python cimport callable, _cstr, _isString
@@ -214,7 +220,9 @@
cdef class QName:
- """QName wrapper.
+ """QName(text_or_uri, tag=None)
+
+ QName wrapper.
Pass a tag name by itself or a namespace URI and a tag name to
create a qualified name. The ``text`` property holds the
@@ -510,7 +518,9 @@
cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
- """Element class. References a document object and a libxml node.
+ """Element class.
+
+ References a document object and a libxml node.
By pointing to a Document instance, a reference is kept to
_Document as long as there is some pointer to a node in it.
@@ -522,7 +532,9 @@
cdef object _attrib
def _init(self):
- """Called after object initialisation. Custom subclasses may override
+ """_init(self)
+
+ Called after object initialisation. Custom subclasses may override
this if they recursively call _init() in the superclasses.
"""
@@ -537,7 +549,9 @@
# MANIPULATORS
def __setitem__(self, x, value):
- """Replaces the given subelement index or slice.
+ """__setitem__(self, x, value)
+
+ Replaces the given subelement index or slice.
"""
cdef xmlNode* c_node
cdef xmlNode* c_next
@@ -571,7 +585,9 @@
moveNodeToDocument(self._doc, c_node)
def __delitem__(self, x):
- """Deletes the given subelement or a slice.
+ """__delitem__(self, x)
+
+ Deletes the given subelement or a slice.
"""
cdef xmlNode* c_node
cdef xmlNode* c_next
@@ -599,9 +615,11 @@
_removeNode(self._doc, c_node)
def __deepcopy__(self, memo):
+ "__deepcopy__(self, memo)"
return self.__copy__()
def __copy__(self):
+ "__copy__(self)"
cdef xmlDoc* c_doc
cdef xmlNode* c_node
cdef _Document new_doc
@@ -619,17 +637,23 @@
return _elementFactory(new_doc, c_node)
def set(self, key, value):
- """Sets an element attribute.
+ """set(self, key, value)
+
+ Sets an element attribute.
"""
_setAttributeValue(self, key, value)
def append(self, _Element element not None):
- """Adds a subelement to the end of this element.
+ """append(self, element)
+
+ Adds a subelement to the end of this element.
"""
_appendChild(self, element)
def addnext(self, _Element element):
- """Adds the element as a following sibling directly after this
+ """addnext(self, element)
+
+ Adds the element as a following sibling directly after this
element.
This is normally used to set a processing instruction or comment after
@@ -644,7 +668,9 @@
_appendSibling(self, element)
def addprevious(self, _Element element):
- """Adds the element as a preceding sibling directly before this
+ """addprevious(self, element)
+
+ Adds the element as a preceding sibling directly before this
element.
This is normally used to set a processing instruction or comment
@@ -659,13 +685,17 @@
_prependSibling(self, element)
def extend(self, elements):
- """Extends the current children by the elements in the iterable.
+ """extend(self, elements)
+
+ Extends the current children by the elements in the iterable.
"""
for element in elements:
_appendChild(self, element)
def clear(self):
- """Resets an element. This function removes all subelements, clears
+ """clear(self)
+
+ Resets an element. This function removes all subelements, clears
all attributes and sets the text and tail properties to None.
"""
cdef xmlAttr* c_attr
@@ -693,7 +723,9 @@
c_node = c_node_next
def insert(self, index, _Element element not None):
- """Inserts a subelement at the given position in this element
+ """insert(self, index, element)
+
+ Inserts a subelement at the given position in this element
"""
cdef xmlNode* c_node
cdef xmlNode* c_next
@@ -707,7 +739,9 @@
moveNodeToDocument(self._doc, element._c_node)
def remove(self, _Element element not None):
- """Removes a matching subelement. Unlike the find methods, this
+ """remove(self, element)
+
+ Removes a matching subelement. Unlike the find methods, this
method compares elements based on identity, not on tag value
or contents.
"""
@@ -724,7 +758,9 @@
def replace(self, _Element old_element not None,
_Element new_element not None):
- """Replaces a subelement with the element passed as second argument.
+ """replace(self, old_element, new_element)
+
+ Replaces a subelement with the element passed as second argument.
"""
cdef xmlNode* c_old_node
cdef xmlNode* c_old_next
@@ -862,6 +898,7 @@
# ACCESSORS
def __repr__(self):
+ "__repr__(self)"
return "" % (self.tag, id(self))
def __getitem__(self, x):
@@ -901,11 +938,14 @@
return _elementFactory(self._doc, c_node)
def __len__(self):
- """Returns the number of subelements.
+ """__len__(self)
+
+ Returns the number of subelements.
"""
return _countElements(self._c_node.children)
def __nonzero__(self):
+ "__nonzero__(self)"
import warnings
warnings.warn(
"The behavior of this method will change in future versions. "
@@ -916,6 +956,7 @@
return _hasChild(self._c_node)
def __contains__(self, element):
+ "__contains__(self, element)"
cdef xmlNode* c_node
if not isinstance(element, _Element):
return 0
@@ -923,13 +964,17 @@
return c_node is not NULL and c_node.parent is self._c_node
def __iter__(self):
+ "__iter__(self)"
return ElementChildIterator(self)
def __reversed__(self):
+ "__reversed__(self)"
return ElementChildIterator(self, reversed=True)
def index(self, _Element child not None, start=None, stop=None):
- """Find the position of the child within the parent.
+ """index(self, child, start=None, stop=None)
+
+ Find the position of the child within the parent.
This method is not part of the original ElementTree API.
"""
@@ -1012,40 +1057,52 @@
raise ValueError("list.index(x): x not in list")
def get(self, key, default=None):
- """Gets an element attribute.
+ """get(self, key, default=None)
+
+ Gets an element attribute.
"""
return _getAttributeValue(self, key, default)
def keys(self):
- """Gets a list of attribute names. The names are returned in an
+ """keys(self)
+
+ Gets a list of attribute names. The names are returned in an
arbitrary order (just like for an ordinary Python dictionary).
"""
return _collectAttributes(self._c_node, 1)
def values(self):
- """Gets element attribute values as a sequence of strings. The
+ """values(self)
+
+ Gets element attribute values as a sequence of strings. The
attributes are returned in an arbitrary order.
"""
return _collectAttributes(self._c_node, 2)
def items(self):
- """Gets element attributes, as a sequence. The attributes are returned in
+ """items(self)
+
+ Gets element attributes, as a sequence. The attributes are returned in
an arbitrary order.
"""
return _collectAttributes(self._c_node, 3)
def getchildren(self):
- """Returns all direct children. The elements are returned in document
+ """getchildren(self)
+
+ Returns all direct children. The elements are returned in document
order.
- @deprecated: Note that this method has been deprecated as of
- ElementTree 1.3 and lxml 2.0. New code should use
- ``list(element)`` or simply iterate over elements.
+ :deprecated: Note that this method has been deprecated as of
+ ElementTree 1.3 and lxml 2.0. New code should use
+ ``list(element)`` or simply iterate over elements.
"""
return _collectChildren(self)
def getparent(self):
- """Returns the parent of this element or None for the root element.
+ """getparent(self)
+
+ Returns the parent of this element or None for the root element.
"""
cdef xmlNode* c_node
c_node = _parentElement(self._c_node)
@@ -1055,7 +1112,9 @@
return _elementFactory(self._doc, c_node)
def getnext(self):
- """Returns the following sibling of this element or None.
+ """getnext(self)
+
+ Returns the following sibling of this element or None.
"""
cdef xmlNode* c_node
c_node = _nextElement(self._c_node)
@@ -1064,7 +1123,9 @@
return None
def getprevious(self):
- """Returns the preceding sibling of this element or None.
+ """getprevious(self)
+
+ Returns the preceding sibling of this element or None.
"""
cdef xmlNode* c_node
c_node = _previousElement(self._c_node)
@@ -1073,7 +1134,9 @@
return None
def itersiblings(self, tag=None, *, preceding=False):
- """Iterate over the following or preceding siblings of this element.
+ """itersiblings(self, tag=None, preceding=False)
+
+ Iterate over the following or preceding siblings of this element.
The direction is determined by the 'preceding' keyword which defaults
to False, i.e. forward iteration over the following siblings. The
@@ -1083,7 +1146,9 @@
return SiblingsIterator(self, tag, preceding=preceding)
def iterancestors(self, tag=None):
- """Iterate over the ancestors of this element (from parent to parent).
+ """iterancestors(self, tag=None)
+
+ Iterate over the ancestors of this element (from parent to parent).
The generated elements can be restricted to a specific tag name with
the 'tag' keyword.
@@ -1091,7 +1156,9 @@
return AncestorsIterator(self, tag)
def iterdescendants(self, tag=None):
- """Iterate over the descendants of this element in document order.
+ """iterdescendants(self, tag=None)
+
+ Iterate over the descendants of this element in document order.
As opposed to ``el.iter()``, this iterator does not yield the element
itself. The generated elements can be restricted to a specific tag
@@ -1100,7 +1167,9 @@
return ElementDepthFirstIterator(self, tag, inclusive=False)
def iterchildren(self, tag=None, *, reversed=False):
- """Iterate over the children of this element.
+ """iterchildren(self, tag=None, reversed=False)
+
+ Iterate over the children of this element.
As opposed to using normal iteration on this element, the generated
elements can be restricted to a specific tag name with the 'tag'
@@ -1109,7 +1178,9 @@
return ElementChildIterator(self, tag, reversed=reversed)
def getroottree(self):
- """Return an ElementTree for the root node of the document that
+ """getroottree(self)
+
+ Return an ElementTree for the root node of the document that
contains this element.
This is the same as following element.getparent() up the tree until it
@@ -1118,7 +1189,9 @@
return _elementTreeFactory(self._doc, None)
def getiterator(self, tag=None):
- """Returns a sequence or iterator of all elements in the subtree in
+ """getiterator(self, tag=None)
+
+ Returns a sequence or iterator of all elements in the subtree in
document order (depth first pre-order), starting with this
element.
@@ -1128,18 +1201,20 @@
You can also pass the Element, Comment, ProcessingInstruction and
Entity factory functions to look only for the specific element type.
- @deprecated: Note that this method is deprecated as of
- ElementTree 1.3 and lxml 2.0. It returns an iterator in lxml,
- which diverges from the original ElementTree behaviour. If
- you want an efficient iterator, use the ``element.iter()``
- method instead. You should only use this method in new code
- if you require backwards compatibility with older versions of
- lxml or ElementTree.
+ :deprecated: Note that this method is deprecated as of
+ ElementTree 1.3 and lxml 2.0. It returns an iterator in
+ lxml, which diverges from the original ElementTree
+ behaviour. If you want an efficient iterator, use the
+ ``element.iter()`` method instead. You should only use this
+ method in new code if you require backwards compatibility
+ with older versions of lxml or ElementTree.
"""
return ElementDepthFirstIterator(self, tag)
def iter(self, tag=None):
- """Iterate over all elements in the subtree in document order (depth
+ """iter(self, tag=None)
+
+ Iterate over all elements in the subtree in document order (depth
first pre-order), starting with this element.
Can be restricted to find only elements with a specific tag
@@ -1151,7 +1226,9 @@
return ElementDepthFirstIterator(self, tag)
def itertext(self, tag=None, *, with_tail=True):
- """Iterates over the text content of a subtree.
+ """itertext(self, tag=None, with_tail=True)
+
+ Iterates over the text content of a subtree.
You can pass the ``tag`` keyword argument to restrict text content to
a specific tag name.
@@ -1162,41 +1239,53 @@
return ElementTextIterator(self, tag, with_tail=with_tail)
def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
- """Creates a new element associated with the same document.
+ """makeelement(self, _tag, attrib=None, nsmap=None, **_extra)
+
+ Creates a new element associated with the same document.
"""
return _makeElement(_tag, NULL, self._doc, None, None, None,
attrib, nsmap, _extra)
def find(self, path):
- """Finds the first matching subelement, by tag name or path.
+ """find(self, path)
+
+ Finds the first matching subelement, by tag name or path.
"""
if isinstance(path, QName):
path = (path).text
return _elementpath.find(self, path)
def findtext(self, path, default=None):
- """Finds text for the first matching subelement, by tag name or path.
+ """findtext(self, path, default=None)
+
+ Finds text for the first matching subelement, by tag name or path.
"""
if isinstance(path, QName):
path = (path).text
return _elementpath.findtext(self, path, default)
def findall(self, path):
- """Finds all matching subelements, by tag name or path.
+ """findall(self, path)
+
+ Finds all matching subelements, by tag name or path.
"""
if isinstance(path, QName):
path = (path).text
return _elementpath.findall(self, path)
def iterfind(self, path):
- """Iterates over all matching subelements, by tag name or path.
+ """iterfind(self, path)
+
+ Iterates over all matching subelements, by tag name or path.
"""
if isinstance(path, QName):
path = (path).text
return _elementpath.iterfind(self, path)
def xpath(self, _path, *, namespaces=None, extensions=None, **_variables):
- """Evaluate an xpath expression using the element as context node.
+ """xpath(self, _path, namespaces=None, extensions=None, **_variables)
+
+ Evaluate an xpath expression using the element as context node.
"""
evaluator = XPathElementEvaluator(self, namespaces=namespaces,
extensions=extensions)
@@ -1260,18 +1349,19 @@
raise TypeError("this element does not have children or attributes")
def set(self, key, value):
+ "set(self, key, value)"
self._raiseImmutable()
def append(self, value):
+ "append(self, value)"
self._raiseImmutable()
def insert(self, index, value):
+ "insert(self, index, value)"
self._raiseImmutable()
def __setitem__(self, index, value):
- self._raiseImmutable()
-
- def __setslice__(self, start, end, value):
+ "__setitem__(self, index, value)"
self._raiseImmutable()
property attrib:
@@ -1297,24 +1387,30 @@
# ACCESSORS
def __getitem__(self, x):
+ "__getitem__(self, x)"
if python.PySlice_Check(x):
return []
else:
raise IndexError("list index out of range")
def __len__(self):
+ "__len__(self)"
return 0
def get(self, key, default=None):
+ "get(self, key, default=None)"
return None
def keys(self):
+ "keys(self)"
return []
def items(self):
+ "items(self)"
return []
def values(self):
+ "values(self)"
return []
cdef class _Comment(__ContentOnlyElement):
@@ -1393,7 +1489,9 @@
"ElementTree not initialized, missing root"
def parse(self, source, _BaseParser parser=None):
- """Updates self with the content of source and returns its root
+ """parse(self, source, parser=None)
+
+ Updates self with the content of source and returns its root
"""
cdef _Document doc
doc = _parseDocument(source, parser)
@@ -1405,7 +1503,9 @@
return self._context_node
def _setroot(self, _Element root not None):
- """Relocate the ElementTree to a new root node.
+ """_setroot(self, root)
+
+ Relocate the ElementTree to a new root node.
"""
if root._c_node.type != tree.XML_ELEMENT_NODE:
raise TypeError("Only elements can be the root of an ElementTree")
@@ -1413,7 +1513,9 @@
self._doc = None
def getroot(self):
- """Gets the root element for this tree.
+ """getroot(self)
+
+ Gets the root element for this tree.
"""
return self._context_node
@@ -1448,7 +1550,10 @@
def write(self, file, *, encoding=None, method="xml",
pretty_print=False, xml_declaration=None, with_tail=True):
- """Write the tree to a file or file-like object.
+ """write(self, file, encoding=None, method="xml",
+ pretty_print=False, xml_declaration=None, with_tail=True)
+
+ Write the tree to a file or file-like object.
Defaults to ASCII encoding and writing a declaration as needed.
@@ -1473,7 +1578,9 @@
write_declaration, 1, pretty_print, with_tail)
def getpath(self, _Element element not None):
- """Returns a structural, absolute XPath expression to find that element.
+ """getpath(self, element)
+
+ Returns a structural, absolute XPath expression to find that element.
"""
cdef _Document doc
cdef xmlDoc* c_doc
@@ -1491,7 +1598,9 @@
return path
def getiterator(self, tag=None):
- """Returns a sequence or iterator of all elements in document order
+ """getiterator(self, tag=None)
+
+ Returns a sequence or iterator of all elements in document order
(depth first pre-order), starting with the root element.
Can be restricted to find only elements with a specific tag
@@ -1501,13 +1610,13 @@
You can also pass the Element, Comment, ProcessingInstruction and
Entity factory functions to look only for the specific element type.
- @deprecated: Note that this method is deprecated as of
- ElementTree 1.3 and lxml 2.0. It returns an iterator in lxml,
- which diverges from the original ElementTree behaviour. If
- you want an efficient iterator, use the ``tree.iter()`` method
- instead. You should only use this method in new code if you
- require backwards compatibility with older versions of lxml or
- ElementTree.
+ :deprecated: Note that this method is deprecated as of
+ ElementTree 1.3 and lxml 2.0. It returns an iterator in
+ lxml, which diverges from the original ElementTree
+ behaviour. If you want an efficient iterator, use the
+ ``tree.iter()`` method instead. You should only use this
+ method in new code if you require backwards compatibility
+ with older versions of lxml or ElementTree.
"""
root = self.getroot()
if root is None:
@@ -1515,7 +1624,9 @@
return root.getiterator(tag)
def iter(self, tag=None):
- """Creates an iterator for the root element. The iterator loops over
+ """iter(self, tag=None)
+
+ Creates an iterator for the root element. The iterator loops over
all elements in this tree, in document order.
"""
root = self.getroot()
@@ -1524,7 +1635,9 @@
return root.iter(tag)
def find(self, path):
- """Finds the first toplevel element with given tag. Same as
+ """find(self, path)
+
+ Finds the first toplevel element with given tag. Same as
``tree.getroot().find(path)``.
"""
self._assertHasRoot()
@@ -1534,7 +1647,9 @@
return root.find(path)
def findtext(self, path, default=None):
- """Finds the text for the first element matching the ElementPath
+ """findtext(self, path, default=None)
+
+ Finds the text for the first element matching the ElementPath
expression. Same as getroot().findtext(path)
"""
self._assertHasRoot()
@@ -1544,7 +1659,9 @@
return root.findtext(path, default)
def findall(self, path):
- """Finds all elements matching the ElementPath expression. Same as
+ """findall(self, path)
+
+ Finds all elements matching the ElementPath expression. Same as
getroot().findall(path).
"""
self._assertHasRoot()
@@ -1554,7 +1671,9 @@
return root.findall(path)
def iterfind(self, path):
- """Iterates over all elements matching the ElementPath expression.
+ """iterfind(self, path)
+
+ Iterates over all elements matching the ElementPath expression.
Same as getroot().finditer(path).
"""
self._assertHasRoot()
@@ -1564,7 +1683,9 @@
return root.iterfind(path)
def xpath(self, _path, *, namespaces=None, extensions=None, **_variables):
- """XPath evaluate in context of document.
+ """xpath(self, _path, namespaces=None, extensions=None, **_variables)
+
+ XPath evaluate in context of document.
``namespaces`` is an optional dictionary with prefix to namespace URI
mappings, used by XPath. ``extensions`` defines additional extension
@@ -1585,7 +1706,9 @@
return evaluator.evaluate(_path, **_variables)
def xslt(self, _xslt, extensions=None, access_control=None, **_kw):
- """Transform this document using other document.
+ """xslt(self, _xslt, extensions=None, access_control=None, **_kw)
+
+ Transform this document using other document.
xslt is a tree that should be XSLT
keyword parameters are XSLT transformation parameters.
@@ -1602,7 +1725,9 @@
return style(self, **_kw)
def relaxng(self, relaxng):
- """Validate this document using other document.
+ """relaxng(self, relaxng)
+
+ Validate this document using other document.
The relaxng argument is a tree that should contain a Relax NG schema.
@@ -1618,7 +1743,9 @@
return schema.validate(self)
def xmlschema(self, xmlschema):
- """Validate this document using other document.
+ """xmlschema(self, xmlschema)
+
+ Validate this document using other document.
The xmlschema argument is a tree that should contain an XML Schema.
@@ -1634,7 +1761,9 @@
return schema.validate(self)
def xinclude(self):
- """Process the XInclude nodes in this document and include the
+ """xinclude(self)
+
+ Process the XInclude nodes in this document and include the
referenced XML fragments.
There is support for loading files through the file system, HTTP and
@@ -1648,7 +1777,9 @@
XInclude()(self._context_node)
def write_c14n(self, file):
- """C14N write of document. Always writes UTF-8.
+ """write_c14n(self, file)
+
+ C14N write of document. Always writes UTF-8.
"""
self._assertHasRoot()
_tofilelikeC14N(file, self._context_node)
@@ -1669,9 +1800,7 @@
cdef class _Attrib:
- """A proxy for the ``Element.attrib`` property.
-
- Behaves as a normal Python dict.
+ """A dict-like proxy for the ``Element.attrib`` property.
"""
cdef _Element _element
def __init__(self, _Element element not None):
@@ -1898,7 +2027,9 @@
return current_node
cdef class ElementChildIterator(_ElementIterator):
- "Iterates over the children of an element."
+ """ElementChildIterator(self, node, tag=None, reversed=False)
+ Iterates over the children of an element.
+ """
def __init__(self, _Element node not None, tag=None, *, reversed=False):
cdef xmlNode* c_node
self._initTagMatch(tag)
@@ -1919,7 +2050,8 @@
self._node = _elementFactory(node._doc, c_node)
cdef class SiblingsIterator(_ElementIterator):
- """Iterates over the siblings of an element.
+ """SiblingsIterator(self, node, tag=None, preceding=False)
+ Iterates over the siblings of an element.
You can pass the boolean keyword ``preceding`` to specify the direction.
"""
@@ -1932,18 +2064,24 @@
self._storeNext(node)
cdef class AncestorsIterator(_ElementIterator):
- "Iterates over the ancestors of an element (from parent to parent)."
+ """AncestorsIterator(self, node, tag=None)
+ Iterates over the ancestors of an element (from parent to parent).
+ """
def __init__(self, _Element node not None, tag=None):
self._initTagMatch(tag)
self._next_element = _parentElement
self._storeNext(node)
cdef class ElementDepthFirstIterator(_ElementTagMatcher):
- """Iterates over an element and its sub-elements in document order (depth
- first pre-order). Note that this also includes comments, entities and
- processing instructions. To filter them out, check if the ``tag``
- property of the returned element is a string (i.e. not None and not a
- factory function), or pass the ``Element`` factory for the ``tag`` keyword.
+ """ElementDepthFirstIterator(self, node, tag=None, inclusive=True)
+ Iterates over an element and its sub-elements in document order (depth
+ first pre-order).
+
+ Note that this also includes comments, entities and processing
+ instructions. To filter them out, check if the ``tag`` property
+ of the returned element is a string (i.e. not None and not a
+ factory function), or pass the ``Element`` factory for the ``tag``
+ keyword.
If the optional ``tag`` argument is not None, the iterator returns only
the elements that match the respective name and namespace.
@@ -2006,7 +2144,8 @@
return NULL
cdef class ElementTextIterator:
- """Iterates over the text content of a subtree.
+ """ElementTextIterator(self, element, tag=None, with_tail=True)
+ Iterates over the text content of a subtree.
You can pass the ``tag`` keyword argument to restrict text content to a
specific tag name.
@@ -2060,7 +2199,9 @@
# module-level API for ElementTree
def Element(_tag, attrib=None, nsmap=None, **_extra):
- """Element factory. This function returns an object implementing the
+ """Element(_tag, attrib=None, nsmap=None, **_extra)
+
+ Element factory. This function returns an object implementing the
Element interface.
"""
### also look at _Element.makeelement() and _BaseParser.makeelement() ###
@@ -2068,7 +2209,9 @@
attrib, nsmap, _extra)
def Comment(text=None):
- """Comment element factory. This factory function creates a special element that will
+ """Comment(text=None)
+
+ Comment element factory. This factory function creates a special element that will
be serialized as an XML comment.
"""
cdef _Document doc
@@ -2085,7 +2228,9 @@
return _elementFactory(doc, c_node)
def ProcessingInstruction(target, text=None):
- """ProcessingInstruction element factory. This factory function creates a
+ """ProcessingInstruction(target, text=None)
+
+ ProcessingInstruction element factory. This factory function creates a
special element that will be serialized as an XML processing instruction.
"""
cdef _Document doc
@@ -2105,7 +2250,9 @@
PI = ProcessingInstruction
def Entity(name):
- """Entity factory. This factory function creates a special element
+ """Entity(name)
+
+ Entity factory. This factory function creates a special element
that will be serialized as an XML entity reference or character
reference. Note, however, that entities will not be automatically
declared in the document. A document that uses entity references
@@ -2130,13 +2277,17 @@
def SubElement(_Element _parent not None, _tag,
attrib=None, nsmap=None, **_extra):
- """Subelement factory. This function creates an element instance, and
+ """SubElement(_parent, _tag, attrib=None, nsmap=None, **_extra)
+
+ Subelement factory. This function creates an element instance, and
appends it to an existing element.
"""
return _makeSubElement(_parent, _tag, None, None, attrib, nsmap, _extra)
def ElementTree(_Element element=None, *, file=None, _BaseParser parser=None):
- """ElementTree wrapper class.
+ """ElementTree(element=None, file=None, parser=None)
+
+ ElementTree wrapper class.
"""
cdef xmlNode* c_next
cdef xmlNode* c_node
@@ -2159,7 +2310,9 @@
return _elementTreeFactory(doc, element)
def HTML(text, _BaseParser parser=None, *, base_url=None):
- """Parses an HTML document from a string constant. This function can be used
+ """HTML(text, parser=None, base_url=None)
+
+ Parses an HTML document from a string constant. This function can be used
to embed "HTML literals" in Python code.
To override the parser with a different ``HTMLParser`` you can pass it to
@@ -2181,7 +2334,9 @@
return result_container.result
def XML(text, _BaseParser parser=None, *, base_url=None):
- """Parses an XML document from a string constant. This function can be used
+ """XML(text, parser=None, base_url=None)
+
+ Parses an XML document from a string constant. This function can be used
to embed "XML literals" in Python code, like in
>>> root = etree.XML("")
@@ -2205,7 +2360,9 @@
return result_container.result
def fromstring(text, _BaseParser parser=None, *, base_url=None):
- """Parses an XML document from a string.
+ """fromstring(text, parser=None, base_url=None)
+
+ Parses an XML document from a string.
To override the default parser with a different parser you can pass it to
the ``parser`` keyword argument.
@@ -2222,7 +2379,9 @@
return result_container.result
def fromstringlist(strings, _BaseParser parser=None):
- """Parses an XML document from a sequence of strings.
+ """fromstringlist(strings, parser=None)
+
+ Parses an XML document from a sequence of strings.
To override the default parser with a different parser you can pass it to
the ``parser`` keyword argument.
@@ -2236,19 +2395,26 @@
return parser.close()
def iselement(element):
- """Checks if an object appears to be a valid element object.
+ """iselement(element)
+
+ Checks if an object appears to be a valid element object.
"""
return isinstance(element, _Element)
def dump(_Element elem not None, *, pretty_print=True, with_tail=True):
- """Writes an element tree or element structure to sys.stdout. This function
+ """dump(elem, pretty_print=True, with_tail=True)
+
+ Writes an element tree or element structure to sys.stdout. This function
should be used for debugging only.
"""
_dumpToFile(sys.stdout, elem._c_node, pretty_print, with_tail)
def tostring(element_or_tree, *, encoding=None, method="xml",
xml_declaration=None, pretty_print=False, with_tail=True):
- """Serialize an element to an encoded string representation of its XML
+ """tostring(element_or_tree, encoding=None, method="xml",
+ xml_declaration=None, pretty_print=False, with_tail=True)
+
+ Serialize an element to an encoded string representation of its XML
tree.
Defaults to ASCII encoding without XML declaration. This behaviour can be
@@ -2295,7 +2461,9 @@
type(element_or_tree))
def tostringlist(element_or_tree, *args, **kwargs):
- """Serialize an element to an encoded string representation of its XML
+ """tostringlist(element_or_tree, *args, **kwargs)
+
+ Serialize an element to an encoded string representation of its XML
tree, stored in a list of partial strings.
This is purely for ElementTree 1.3 compatibility. The result is a
@@ -2305,7 +2473,10 @@
def tounicode(element_or_tree, *, method="xml", pretty_print=False,
with_tail=True):
- """Serialize an element to the Python unicode representation of its XML
+ """tounicode(element_or_tree, method="xml", pretty_print=False,
+ with_tail=True)
+
+ Serialize an element to the Python unicode representation of its XML
tree.
Note that the result does not carry an XML encoding declaration and is
@@ -2321,7 +2492,7 @@
by passing the boolean ``with_tail`` option. This has no impact
on the tail text of children, which will always be serialised.
- @deprecated: use ``tostring(el, encoding=unicode)`` instead.
+ :deprecated: use ``tostring(el, encoding=unicode)`` instead.
"""
if isinstance(element_or_tree, _Element):
return _tounicode(<_Element>element_or_tree, method, 0, pretty_print,
@@ -2334,7 +2505,9 @@
type(element_or_tree))
def parse(source, _BaseParser parser=None):
- """Return an ElementTree object loaded with source elements. If no parser
+ """parse(source, parser=None)
+
+ Return an ElementTree object loaded with source elements. If no parser
is provided as second argument, the default parser is used.
"""
cdef _Document doc
@@ -2369,8 +2542,10 @@
# Validation
class DocumentInvalid(LxmlError):
- """Validation error. Raised by all document validators when their
- ``assertValid(tree)`` method fails.
+ """Validation error.
+
+ Raised by all document validators when their ``assertValid(tree)``
+ method fails.
"""
pass
@@ -2378,28 +2553,39 @@
"Base class for XML validators."
cdef _ErrorLog _error_log
def __init__(self):
+ "__init__(self)"
self._error_log = _ErrorLog()
def validate(self, etree):
- """Validate the document using this schema.
+ """validate(self, etree)
+
+ Validate the document using this schema.
- Returns true if document is valid, false if not."""
+ Returns true if document is valid, false if not.
+ """
return self(etree)
def assertValid(self, etree):
- "Raises DocumentInvalid if the document does not comply with the schema."
+ """assertValid(self, etree)
+
+ Raises `DocumentInvalid` if the document does not comply with the schema.
+ """
if not self(etree):
raise DocumentInvalid(self._error_log._buildExceptionMessage(
"Document does not comply with schema"),
self._error_log)
def assert_(self, etree):
- "Raises AssertionError if the document does not comply with the schema."
+ """assert_(self, etree)
+
+ Raises `AssertionError` if the document does not comply with the schema.
+ """
if not self(etree):
raise AssertionError(self._error_log._buildExceptionMessage(
"Document does not comply with schema"))
property error_log:
+ "The log of validation errors and warnings."
def __get__(self):
return self._error_log.copy()
Modified: lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx (original)
+++ lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx Thu Feb 14 15:52:28 2008
@@ -1,3 +1,7 @@
+"""The ``lxml.objectify`` module implements a Python object API for
+XML. It is based on `lxml.etree`.
+"""
+
from etreepublic cimport _Document, _Element, ElementBase
from etreepublic cimport _ElementIterator, ElementClassLookup
from etreepublic cimport elementFactory, import_lxml__etree, textOf
@@ -153,7 +157,9 @@
return _countSiblings(self._c_node)
def countchildren(self):
- """Return the number of children of this element, regardless of their
+ """countchildren(self)
+
+ Return the number of children of this element, regardless of their
name.
"""
# copied from etree
@@ -168,7 +174,9 @@
return c
def getchildren(self):
- """Returns a sequence of all direct children. The elements are
+ """getchildren(self)
+
+ Returns a sequence of all direct children. The elements are
returned in document order.
"""
cdef tree.xmlNode* c_node
@@ -217,7 +225,9 @@
self.remove(child)
def addattr(self, tag, value):
- """Add a child value to the element.
+ """addattr(self, tag, value)
+
+ Add a child value to the element.
As opposed to append(), it sets a data value, not an element.
"""
@@ -242,13 +252,7 @@
if python._isString(key):
return _lookupChildOrRaise(self, key)
elif python.PySlice_Check(key):
- python.PySlice_GetIndicesEx(
- key, _countSiblings(self._c_node),
- &start, &stop, &step, &slicelength)
- if step < 0:
- return list(self)[start:stop:step]
- else:
- return list(islice(self, start, stop, step))
+ return list(self)[key]
# normal item access
c_self_node = self._c_node
c_parent = c_self_node.parent
@@ -269,7 +273,8 @@
def __setitem__(self, key, value):
"""Set the value of a sibling, counting from the first child of the
- parent.
+ parent. Implements key assignment, item assignment and slice
+ assignment.
* If argument is an integer, sets the sibling at that position.
@@ -280,12 +285,7 @@
items to the siblings.
"""
cdef _Element element
- cdef _Element parent
- cdef _Element new_element
- cdef tree.xmlNode* c_self_node
- cdef tree.xmlNode* c_parent
cdef tree.xmlNode* c_node
- cdef Py_ssize_t start, stop, step, slicelength
if python._isString(key):
key = _buildChildTag(self, key)
element = _lookupChild(self, key)
@@ -295,48 +295,21 @@
_replaceElement(element, value)
return
- c_self_node = self._c_node
- c_parent = c_self_node.parent
- if c_parent is NULL:
+ if self._c_node.parent is NULL:
# the 'root[i] = ...' case
raise TypeError("assignment to root element is invalid")
if python.PySlice_Check(key):
# slice assignment
- python.PySlice_GetIndicesEx(
- key, _countSiblings(self._c_node),
- &start, &stop, &step, &slicelength)
- # replace existing items
- new_items = iter(value)
- if step < 0:
- del_items = list(self)[start:stop:step]
- else:
- del_items = list(islice(self, start, stop, step))
- del_items = iter(del_items)
- parent = self.getparent()
- try:
- for el in del_items:
- item = new_items.next()
- _replaceElement(el, item)
- except StopIteration:
- remove = parent.remove
- remove(el)
- for el in del_items:
- remove(el)
- return
- else:
- # append remaining new items
- tag = self.tag
- for item in new_items:
- _appendValue(parent, tag, item)
+ _setSlice(key, self, value)
else:
# normal index assignment
if key < 0:
- c_node = c_parent.last
+ c_node = self._c_node.parent.last
else:
- c_node = c_parent.children
+ c_node = self._c_node.parent.children
c_node = _findFollowingSibling(
- c_node, tree._getNs(c_self_node), c_self_node.name, key)
+ c_node, tree._getNs(self._c_node), self._c_node.name, key)
if c_node is NULL:
raise IndexError(key)
element = elementFactory(self._doc, c_node)
@@ -368,18 +341,21 @@
parent.remove(sibling)
def iterfind(self, path):
+ "iterfind(self, path)"
# Reimplementation of Element.iterfind() to make it work without child
# iteration.
xpath = etree.ETXPath(path)
return iter(xpath(self))
def findall(self, path):
+ "findall(self, path)"
# Reimplementation of Element.findall() to make it work without child
# iteration.
xpath = etree.ETXPath(path)
return xpath(self)
def find(self, path):
+ "find(self, path)"
# Reimplementation of Element.find() to make it work without child
# iteration.
result = self.findall(path)
@@ -391,6 +367,7 @@
return None
def findtext(self, path, default=None):
+ "findtext(self, path, default=None)"
# Reimplementation of Element.findtext() to make it work without child
# iteration.
result = self.find(path)
@@ -400,7 +377,9 @@
return default
def descendantpaths(self, prefix=None):
- """Returns a list of object path expressions for all descendants.
+ """descendantpaths(self, prefix=None)
+
+ Returns a list of object path expressions for all descendants.
"""
if prefix is not None and not python._isString(prefix):
prefix = '.'.join(prefix)
@@ -538,6 +517,81 @@
PYTYPE_ATTRIBUTE_NAME)
cetree.setNodeText(element._c_node, value)
+cdef _setSlice(slice, _Element target, items):
+ cdef _Element parent
+ cdef tree.xmlNode* c_node
+ cdef Py_ssize_t c_step, c_start, pos
+ # collect existing slice
+ if (slice).step is None:
+ c_step = 1
+ else:
+ c_step = (slice).step
+ if c_step == 0:
+ raise ValueError("Invalid slice")
+ del_items = target[slice]
+
+ # collect new values
+ new_items = []
+ tag = target.tag
+ for item in items:
+ if isinstance(item, _Element):
+ # deep copy the new element
+ new_element = cetree.deepcopyNodeToDocument(
+ target._doc, (<_Element>item)._c_node)
+ new_element.tag = tag
+ else:
+ new_element = cetree.makeElement(
+ tag, target._doc, None, None, None, None, None)
+ _setElementValue(new_element, item)
+ python.PyList_Append(new_items, new_element)
+
+ # sanity check - raise what a list would raise
+ if c_step != 1 and \
+ python.PyList_GET_SIZE(del_items) != python.PyList_GET_SIZE(new_items):
+ raise ValueError(
+ "attempt to assign sequence of size %d to extended slice of size %d" % (
+ python.PyList_GET_SIZE(new_items),
+ python.PyList_GET_SIZE(del_items)))
+
+ # replace existing items
+ pos = 0
+ parent = target.getparent()
+ replace = parent.replace
+ while pos < python.PyList_GET_SIZE(new_items) and \
+ pos < python.PyList_GET_SIZE(del_items):
+ replace(del_items[pos], new_items[pos])
+ pos += 1
+ # remove leftover items
+ if pos < python.PyList_GET_SIZE(del_items):
+ remove = parent.remove
+ while pos < python.PyList_GET_SIZE(del_items):
+ remove(del_items[pos])
+ pos += 1
+ # append remaining new items
+ if pos < python.PyList_GET_SIZE(new_items):
+ # the sanity check above guarantees (step == 1)
+ if pos > 0:
+ item = new_items[pos-1]
+ else:
+ if (slice).start > 0:
+ c_node = parent._c_node.children
+ else:
+ c_node = parent._c_node.last
+ c_node = _findFollowingSibling(
+ c_node, tree._getNs(target._c_node), target._c_node.name,
+ (slice).start - 1)
+ if c_node is NULL:
+ while pos < python.PyList_GET_SIZE(new_items):
+ cetree.appendChild(parent, new_items[pos])
+ pos += 1
+ return
+ item = cetree.elementFactory(parent._doc, c_node)
+ while pos < python.PyList_GET_SIZE(new_items):
+ add = item.addnext
+ item = new_items[pos]
+ add(item)
+ pos += 1
+
################################################################################
# Data type support in subclasses
@@ -815,7 +869,8 @@
# Python type registry
cdef class PyType:
- """User defined type.
+ """PyType(self, name, type_check, type_class, stringify=None)
+ User defined type.
Named type that contains a type check function and a type class that
inherits from ObjectifiedDataElement. The type check must take a string
@@ -824,6 +879,7 @@
guessing.
Example::
+
PyType('int', int, MyIntClass).register()
Note that the order in which types are registered matters. The first
@@ -856,7 +912,9 @@
return "PyType(%s, %s)" % (self.name, self._type.__name__)
def register(self, before=None, after=None):
- """Register the type.
+ """register(self, before=None, after=None)
+
+ Register the type.
The additional keyword arguments 'before' and 'after' accept a
sequence of type names that must appear before/after the new type in
@@ -895,6 +953,7 @@
_SCHEMA_TYPE_DICT[xs_type] = self
def unregister(self):
+ "unregister(self)"
if _PYTYPE_DICT.get(self.name) is self:
del _PYTYPE_DICT[self.name]
for xs_type, pytype in _SCHEMA_TYPE_DICT.items():
@@ -951,7 +1010,9 @@
return _typename(obj)
def pytypename(obj):
- """Find the name of the corresponding PyType for a Python object.
+ """pytypename(obj)
+
+ Find the name of the corresponding PyType for a Python object.
"""
return _pytypename(obj)
@@ -997,7 +1058,9 @@
_registerPyTypes()
def getRegisteredTypes():
- """Returns a list of the currently registered PyType objects.
+ """getRegisteredTypes()
+
+ Returns a list of the currently registered PyType objects.
To add a new type, retrieve this list and call unregister() for all
entries. Then add the new type at a suitable position (possibly replacing
@@ -1061,6 +1124,8 @@
cdef _ObjectifyElementMakerCaller NEW_ELEMENT_MAKER "PY_NEW" (object t)
cdef class ElementMaker:
+ """ElementMaker(self, namespace=None, nsmap=None, annotate=True, makeelement=None)
+ """
cdef object _makeelement
cdef object _namespace
cdef object _nsmap
@@ -1099,6 +1164,7 @@
cdef bint _annotate
def __call__(self, *children, **attrib):
+ "__call__(self, *children, **attrib)"
cdef _ObjectifyElementMakerCaller elementMaker
cdef python.PyObject* pytype
cdef _Element element
@@ -1176,14 +1242,18 @@
__RECURSIVE_STR = 0 # default: off
def enableRecursiveStr(on=True):
- """Enable a recursively generated tree representation for str(element),
+ """enableRecursiveStr(on=True)
+
+ Enable a recursively generated tree representation for str(element),
based on objectify.dump(element).
"""
global __RECURSIVE_STR
__RECURSIVE_STR = on
def dump(_Element element not None):
- """Return a recursively generated string representation of an element.
+ """dump(_Element element not None)
+
+ Return a recursively generated string representation of an element.
"""
return _dump(element, 0)
@@ -1230,6 +1300,7 @@
copy_reg.pickle(ObjectifiedElement, reduceFunction, fromstring)
def pickleReduce(obj):
+ "pickleReduce(obj)"
return (fromstring, (etree.tostring(obj),))
_setupPickle(pickleReduce)
@@ -1239,7 +1310,8 @@
# Element class lookup
cdef class ObjectifyElementClassLookup(ElementClassLookup):
- """Element class lookup method that uses the objectify classes.
+ """ObjectifyElementClassLookup(self, tree_class=None, empty_data_class=None)
+ Element class lookup method that uses the objectify classes.
"""
cdef object empty_data_class
cdef object tree_class
@@ -1325,7 +1397,9 @@
def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False,
empty_pytype=None):
- """Recursively annotates the elements of an XML tree with 'pytype'
+ """pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, empty_pytype=None)
+
+ Recursively annotates the elements of an XML tree with 'pytype'
attributes.
If the 'ignore_old' keyword argument is True (the default), current 'pytype'
@@ -1346,7 +1420,9 @@
def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False,
empty_type=None):
- """Recursively annotates the elements of an XML tree with 'xsi:type'
+ """xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None)
+
+ Recursively annotates the elements of an XML tree with 'xsi:type'
attributes.
If the 'ignore_old' keyword argument is True (the default), current
@@ -1373,7 +1449,9 @@
def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False,
empty_pytype=None, empty_type=None, annotate_xsi=0,
annotate_pytype=1):
- """Recursively annotates the elements of an XML tree with 'xsi:type'
+ """annotate(element_or_tree, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1)
+
+ Recursively annotates the elements of an XML tree with 'xsi:type'
and/or 'py:pytype' attributes.
If the 'ignore_old' keyword argument is True (the default), current
@@ -1559,7 +1637,9 @@
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
def deannotate(element_or_tree, *, pytype=True, xsi=True):
- """Recursively de-annotate the elements of an XML tree by removing 'pytype'
+ """deannotate(element_or_tree, pytype=True, xsi=True)
+
+ Recursively de-annotate the elements of an XML tree by removing 'pytype'
and/or 'type' attributes.
If the 'pytype' keyword argument is True (the default), 'pytype' attributes
@@ -1604,11 +1684,13 @@
objectify_parser = __DEFAULT_PARSER
def setDefaultParser(new_parser = None):
- "This function is deprecated, use ``set_default_parser()`` instead."
+ ":deprecated: use ``set_default_parser()`` instead."
set_default_parser(new_parser)
def set_default_parser(new_parser = None):
- """Replace the default parser used by objectify's Element() and
+ """set_default_parser(new_parser = None)
+
+ Replace the default parser used by objectify's Element() and
fromstring() functions.
The new parser must be an etree.XMLParser.
@@ -1624,7 +1706,9 @@
raise TypeError("parser must inherit from lxml.etree.XMLParser")
def makeparser(**kw):
- """Create a new XML parser for objectify trees.
+ """makeparser(remove_blank_text=True, **kw)
+
+ Create a new XML parser for objectify trees.
You can pass all keyword arguments that are supported by
``etree.XMLParser()``. Note that this parser defaults to removing
@@ -1647,7 +1731,9 @@
_fromstring = etree.fromstring
def fromstring(xml, parser=None):
- """Objectify specific version of the lxml.etree fromstring() function
+ """fromstring(xml, parser=None)
+
+ Objectify specific version of the lxml.etree fromstring() function
that uses the objectify parser.
You can pass a different parser as second argument.
@@ -1662,7 +1748,9 @@
_parse = etree.parse
def parse(f, parser=None):
- """Parse a file or file-like object with the objectify parser.
+ """parse(f, parser=None)
+
+ Parse a file or file-like object with the objectify parser.
You can pass a different parser as second argument.
"""
@@ -1678,7 +1766,9 @@
E = ElementMaker()
def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes):
- """Objectify specific version of the lxml.etree Element() factory that
+ """Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes)
+
+ Objectify specific version of the lxml.etree Element() factory that
always creates a structural (tree) element.
NOTE: requires parser based element class lookup activated in lxml.etree!
@@ -1696,7 +1786,9 @@
def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None,
**_attributes):
- """Create a new element from a Python value and XML attributes taken from
+ """DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, **_attributes)
+
+ Create a new element from a Python value and XML attributes taken from
keyword arguments or a dictionary passed as second argument.
Automatically adds a 'pytype' attribute for the Python type of the value,
Modified: lxml/branch/lxml-2.0/src/lxml/lxml.pyclasslookup.pyx
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/lxml.pyclasslookup.pyx (original)
+++ lxml/branch/lxml-2.0/src/lxml/lxml.pyclasslookup.pyx Thu Feb 14 15:52:28 2008
@@ -1,3 +1,39 @@
+"""
+A whole-tree Element class lookup scheme for `lxml.etree`.
+
+This class lookup scheme allows access to the entire XML tree in
+read-only mode. To use it, let a class inherit from
+`PythonElementClassLookup` and re-implement the ``lookup(self, doc,
+root)`` method:
+
+ >>> from lxml import etree, pyclasslookup
+ >>>
+ >>> class MyElementClass(etree.ElementBase):
+ ... honkey = True
+ ...
+ >>> class MyLookup(pyclasslookup.PythonElementClassLookup):
+ ... def lookup(self, doc, root):
+ ... if root.tag == "sometag":
+ ... return MyElementClass
+ ... else:
+ ... for child in root:
+ ... if child.tag == "someothertag":
+ ... return MyElementClass
+ ... # delegate to default
+ ... return None
+
+Note that the API of the Element objects is not complete. It is
+purely read-only and does not support all features of the normal
+`lxml.etree` API (such as XPath, extended slicing or some iteration
+methods).
+
+Also, you cannot wrap such a read-only Element in an ElementTree, and
+you must take care not to keep a reference to them outside of the
+`lookup()` method.
+
+See http://codespeak.net/lxml/element_classes.html
+"""
+
from etreepublic cimport _Document, _Element, ElementBase
from etreepublic cimport ElementClassLookup, FallbackElementClassLookup
from etreepublic cimport elementFactory, import_lxml__etree
@@ -17,6 +53,7 @@
__version__ = etree.__version__
cdef class _ElementProxy:
+ "The main read-only Element proxy class (for internal use only!)."
cdef tree.xmlNode* _c_node
cdef object _source_proxy
cdef object _dependent_proxies
@@ -128,6 +165,21 @@
c_node = cetree.findChildBackwards(self._c_node, 0)
return c_node != NULL
+ def __iter__(self):
+ return iter(self.getchildren())
+
+ def iterchildren(self, tag=None, *, reversed=False):
+ """iterchildren(self, tag=None, reversed=False)
+
+ Iterate over the children of this element.
+ """
+ children = self.getchildren()
+ if tag is not None:
+ children = [ el for el in children if el.tag == tag ]
+ if reversed:
+ children = children[::-1]
+ return iter(children)
+
def get(self, key, default=None):
"""Gets an element attribute.
"""
@@ -155,7 +207,7 @@
self._assertNode()
return cetree.collectAttributes(self._c_node, 3)
- def getchildren(self):
+ cpdef getchildren(self):
"""Returns all subelements. The elements are returned in document
order.
"""
@@ -201,15 +253,21 @@
return _newProxy(self._source_proxy, c_node)
return None
+
+cdef extern from "etree_defs.h":
+ # macro call to 't->tp_new()' for fast instantiation
+ cdef _ElementProxy NEW_PROXY "PY_NEW" (object t)
+
cdef _ElementProxy _newProxy(_ElementProxy sourceProxy, tree.xmlNode* c_node):
cdef _ElementProxy el
- el = _ElementProxy()
+ el = NEW_PROXY(_ElementProxy)
el._c_node = c_node
if sourceProxy is None:
- sourceProxy = el
- el._dependent_proxies = []
- el._source_proxy = sourceProxy
- python.PyList_Append(sourceProxy._dependent_proxies, el)
+ el._source_proxy = el
+ el._dependent_proxies = [el]
+ else:
+ el._source_proxy = sourceProxy
+ python.PyList_Append(sourceProxy._dependent_proxies, el)
return el
cdef _freeProxies(_ElementProxy sourceProxy):
@@ -238,7 +296,8 @@
cdef class PythonElementClassLookup(FallbackElementClassLookup):
- """Element class lookup based on a subclass method.
+ """PythonElementClassLookup(self, fallback=None)
+ Element class lookup based on a subclass method.
To use it, inherit from this class and override the lookup method to
lookup the element class for a node::
@@ -257,6 +316,10 @@
self._lookup_function = _lookup_class
def lookup(self, doc, element):
+ """lookup(self, doc, element)
+
+ Override this method to implement your own lookup scheme.
+ """
return None
cdef object _lookup_class(state, _Document doc, tree.xmlNode* c_node):
Modified: lxml/branch/lxml-2.0/src/lxml/nsclasses.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/nsclasses.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/nsclasses.pxi Thu Feb 14 15:52:28 2008
@@ -28,10 +28,14 @@
self._entries = {}
def update(self, class_dict_iterable):
- """Forgivingly update the registry. If registered values do not match
- the required type for this registry, or if their name starts with '_',
- they will be silently discarded. This allows registrations at the
- module or class level using vars(), globals() etc."""
+ """update(self, class_dict_iterable)
+
+ Forgivingly update the registry.
+
+ If registered values do not match the required type for this
+ registry, or if their name starts with '_', they will be
+ silently discarded. This allows registrations at the module or
+ class level using vars(), globals() etc."""
if hasattr(class_dict_iterable, 'items'):
class_dict_iterable = class_dict_iterable.items()
for name, item in class_dict_iterable:
@@ -89,7 +93,9 @@
cdef class ElementNamespaceClassLookup(FallbackElementClassLookup):
- """Element class lookup scheme that searches the Element class in the
+ """ElementNamespaceClassLookup(self, fallback=None)
+
+ Element class lookup scheme that searches the Element class in the
Namespace registry.
"""
cdef object _namespace_registries
@@ -99,8 +105,11 @@
self._lookup_function = _find_nselement_class
def get_namespace(self, ns_uri):
- """Retrieve the namespace object associated with the given URI. Creates a
- new one if it does not yet exist."""
+ """get_namespace(self, ns_uri)
+
+ Retrieve the namespace object associated with the given URI.
+
+ Creates a new one if it does not yet exist."""
if ns_uri:
ns_utf = _utf8(ns_uri)
else:
@@ -156,9 +165,13 @@
__FUNCTION_NAMESPACE_REGISTRIES = {}
def FunctionNamespace(ns_uri):
- """Retrieve the function namespace object associated with the given
- URI. Creates a new one if it does not yet exist. A function namespace can
- only be used to register extension functions."""
+ """FunctionNamespace(ns_uri)
+
+ Retrieve the function namespace object associated with the given
+ URI.
+
+ Creates a new one if it does not yet exist. A function namespace
+ can only be used to register extension functions."""
if ns_uri:
ns_utf = _utf8(ns_uri)
else:
Modified: lxml/branch/lxml-2.0/src/lxml/objectpath.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/objectpath.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/objectpath.pxi Thu Feb 14 15:52:28 2008
@@ -8,7 +8,8 @@
cdef class ObjectPath:
- """Immutable object that represents a compiled object path.
+ """ObjectPath(path)
+ Immutable object that represents a compiled object path.
Example for a path: 'root.child[1].{other}child[25]'
"""
@@ -54,6 +55,7 @@
default, use_default)
def hasattr(self, _Element root not None):
+ "hasattr(self, root)"
try:
_findObjectPath(root, self._c_path, self._path_len, None, 0)
except AttributeError:
@@ -61,14 +63,18 @@
return True
def setattr(self, _Element root not None, value):
- """Set the value of the target element in a subtree.
+ """setattr(self, root, value)
+
+ Set the value of the target element in a subtree.
If any of the children on the path does not exist, it is created.
"""
_createObjectPath(root, self._c_path, self._path_len, 1, value)
def addattr(self, _Element root not None, value):
- """Append a value to the target element in a subtree.
+ """addattr(self, root, value)
+
+ Append a value to the target element in a subtree.
If any of the children on the path does not exist, it is created.
"""
Modified: lxml/branch/lxml-2.0/src/lxml/parser.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/parser.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/parser.pxi Thu Feb 14 15:52:28 2008
@@ -668,6 +668,7 @@
return context._error_log.copy()
property resolvers:
+ "The custom resolver registry of this parser."
def __get__(self):
return self._resolvers
@@ -681,7 +682,9 @@
self.set_element_class_lookup(lookup)
def set_element_class_lookup(self, ElementClassLookup lookup = None):
- """Set a lookup scheme for element classes generated from this parser.
+ """set_element_class_lookup(self, lookup = None)
+
+ Set a lookup scheme for element classes generated from this parser.
Reset it by passing None or nothing.
"""
@@ -702,11 +705,16 @@
return parser
def copy(self):
- "Create a new parser with the same configuration."
+ """copy(self)
+
+ Create a new parser with the same configuration.
+ """
return self._copy()
def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
- """Creates a new element associated with this parser.
+ """makeelement(self, _tag, attrib=None, nsmap=None, **_extra)
+
+ Creates a new element associated with this parser.
"""
return _makeElement(_tag, NULL, None, self, None, None,
attrib, nsmap, _extra)
@@ -861,7 +869,9 @@
return context._error_log.copy()
def feed(self, data):
- """Feeds data to the parser. The argument should be an 8-bit string
+ """feed(self, data)
+
+ Feeds data to the parser. The argument should be an 8-bit string
buffer containing encoded data, although Unicode is supported as long
as both string types are not mixed.
@@ -942,7 +952,9 @@
context.cleanup()
def close(self):
- """Terminates feeding data to this parser. This tells the parser to
+ """close(self)
+
+ Terminates feeding data to this parser. This tells the parser to
process any remaining data in the feed buffer, and then returns the
root Element of the tree that was parsed.
@@ -1303,7 +1315,8 @@
############################################################
cdef class TreeBuilder(_SaxParserTarget):
- """Parser target that builds a tree.
+ """TreeBuilder(self, element_factory=None, parser=None)
+ Parser target that builds a tree.
The final tree is returned by the ``close()`` method.
"""
@@ -1343,7 +1356,9 @@
# Python level event handlers
def close(self):
- """Flushes the builder buffers, and returns the toplevel document
+ """close(self)
+
+ Flushes the builder buffers, and returns the toplevel document
element.
"""
assert python.PyList_GET_SIZE(self._element_stack) == 0, "missing end tags"
@@ -1351,19 +1366,27 @@
return self._last
def data(self, data):
- """Adds text to the current element. The value should be either an
+ """data(self, data)
+
+ Adds text to the current element. The value should be either an
8-bit string containing ASCII text, or a Unicode string.
"""
self._handleSaxData(data)
def start(self, tag, attrs, nsmap=None):
- "Opens a new element."
+ """start(self, tag, attrs, nsmap=None)
+
+ Opens a new element.
+ """
if nsmap is None:
nsmap = EMPTY_READ_ONLY_DICT
return self._handleSaxStart(tag, attrs, nsmap)
def end(self, tag):
- "Closes the current element."
+ """end(self, tag)
+
+ Closes the current element.
+ """
element = self._handleSaxEnd(tag)
assert self._last.tag == tag,\
"end tag mismatch (expected %s, got %s)" % (
@@ -1371,9 +1394,13 @@
return element
def pi(self, target, data):
+ """pi(self, target, data)
+ """
return self._handleSaxPi(target, data)
def comment(self, comment):
+ """comment(self, comment)
+ """
return self._handleSaxComment(comment)
# internal SAX event handlers
@@ -1432,33 +1459,38 @@
)
cdef class XMLParser(_FeedParser):
- """The XML parser. Parsers can be supplied as additional argument to
- various parse functions of the lxml API. A default parser is always
- available and can be replaced by a call to the global function
- 'set_default_parser'. New parsers can be created at any time without a
- major run-time overhead.
+ """XMLParser(self, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, remove_blank_text=False, compact=True, resolve_entities=True, remove_comments=False, remove_pis=False, target=None, encoding=None, schema=None)
+ The XML parser.
+
+ Parsers can be supplied as additional argument to various parse
+ functions of the lxml API. A default parser is always available
+ and can be replaced by a call to the global function
+ 'set_default_parser'. New parsers can be created at any time
+ without a major run-time overhead.
The keyword arguments in the constructor are mainly based on the libxml2
parser configuration. A DTD will also be loaded if validation or
attribute default values are requested.
Available boolean keyword arguments:
- * attribute_defaults - read default attributes from DTD
- * dtd_validation - validate (if DTD is available)
- * load_dtd - use DTD for parsing
- * no_network - prevent network access for related files (default: True)
- * ns_clean - clean up redundant namespace declarations
- * recover - try hard to parse through broken XML
- * remove_blank_text - discard blank text nodes
- * remove_comments - discard comments
- * remove_pis - discard processing instructions
- * compact - safe memory for short text content (default: True)
- * resolve_entities - replace entities by their text value (default: True)
+
+ - attribute_defaults - read default attributes from DTD
+ - dtd_validation - validate (if DTD is available)
+ - load_dtd - use DTD for parsing
+ - no_network - prevent network access for related files (default: True)
+ - ns_clean - clean up redundant namespace declarations
+ - recover - try hard to parse through broken XML
+ - remove_blank_text - discard blank text nodes
+ - remove_comments - discard comments
+ - remove_pis - discard processing instructions
+ - compact - safe memory for short text content (default: True)
+ - resolve_entities - replace entities by their text value (default: True)
Other keyword arguments:
- * encoding - override the document encoding
- * target - a parser target object that will receive the parse events
- * schema - an XMLSchema to validate against
+
+ - encoding - override the document encoding
+ - target - a parser target object that will receive the parse events
+ - schema - an XMLSchema to validate against
Note that you should avoid sharing parsers between threads. While this is
not harmful, it is more efficient to use separate parsers. This does not
@@ -1498,8 +1530,10 @@
target, None, encoding)
cdef class ETCompatXMLParser(XMLParser):
- """An XML parser with an ElementTree compatible default setup. See the
- XMLParser class for details.
+ """ETCompatXMLParser(self, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, remove_blank_text=False, compact=True, resolve_entities=True, remove_comments=True, remove_pis=True, target=None, encoding=None, schema=None)
+ An XML parser with an ElementTree compatible default setup.
+
+ See the XMLParser class for details.
This parser has ``remove_comments`` and ``remove_pis`` enabled by default
and thus ignores comments and processing instructions.
@@ -1532,15 +1566,17 @@
__GLOBAL_PARSER_CONTEXT.setDefaultParser(__DEFAULT_XML_PARSER)
def setDefaultParser(parser=None):
- "@deprecated: please use set_default_parser instead."
+ ":deprecated: please use set_default_parser instead."
set_default_parser(parser)
def getDefaultParser():
- "@deprecated: please use get_default_parser instead."
+ ":deprecated: please use get_default_parser instead."
return get_default_parser()
def set_default_parser(_BaseParser parser=None):
- """Set a default parser for the current thread. This parser is used
+ """set_default_parser(parser=None)
+
+ Set a default parser for the current thread. This parser is used
globally whenever no parser is supplied to the various parse functions of
the lxml API. If this function is called without a parser (or if it is
None), the default parser is reset to the original configuration.
@@ -1554,6 +1590,7 @@
__GLOBAL_PARSER_CONTEXT.setDefaultParser(parser)
def get_default_parser():
+ "get_default_parser()"
return __GLOBAL_PARSER_CONTEXT.getDefaultParser()
############################################################
@@ -1568,22 +1605,28 @@
)
cdef class HTMLParser(_FeedParser):
- """The HTML parser. This parser allows reading HTML into a normal XML
- tree. By default, it can read broken (non well-formed) HTML, depending on
- the capabilities of libxml2. Use the 'recover' option to switch this off.
+ """HTMLParser(self, recover=True, no_network=True, remove_blank_text=False, compact=True, remove_comments=False, remove_pis=False, target=None, encoding=None, schema=None)
+ The HTML parser.
+
+ This parser allows reading HTML into a normal XML tree. By
+ default, it can read broken (non well-formed) HTML, depending on
+ the capabilities of libxml2. Use the 'recover' option to switch
+ this off.
Available boolean keyword arguments:
- * recover - try hard to parse through broken HTML (default: True)
- * no_network - prevent network access for related files (default: True)
- * remove_blank_text - discard empty text nodes
- * remove_comments - discard comments
- * remove_pis - discard processing instructions
- * compact - safe memory for short text content (default: True)
+
+ - recover - try hard to parse through broken HTML (default: True)
+ - no_network - prevent network access for related files (default: True)
+ - remove_blank_text - discard empty text nodes
+ - remove_comments - discard comments
+ - remove_pis - discard processing instructions
+ - compact - safe memory for short text content (default: True)
Other keyword arguments:
- * encoding - override the document encoding
- * target - a parser target object that will receive the parse events
- * schema - an XMLSchema to validate against
+
+ - encoding - override the document encoding
+ - target - a parser target object that will receive the parse events
+ - schema - an XMLSchema to validate against
Note that you should avoid sharing parsers between threads for performance
reasons.
Modified: lxml/branch/lxml-2.0/src/lxml/relaxng.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/relaxng.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/relaxng.pxi Thu Feb 14 15:52:28 2008
@@ -20,7 +20,8 @@
# RelaxNG
cdef class RelaxNG(_Validator):
- """Turn a document into a Relax NG validator.
+ """RelaxNG(self, etree=None, file=None)
+ Turn a document into a Relax NG validator.
Either pass a schema as Element or ElementTree, or pass a file or
filename through the ``file`` keyword argument.
@@ -91,7 +92,9 @@
relaxng.xmlRelaxNGFree(self._c_schema)
def __call__(self, etree):
- """Validate doc using Relax NG.
+ """__call__(self, etree)
+
+ Validate doc using Relax NG.
Returns true if document is valid, false if not."""
cdef _Document doc
Modified: lxml/branch/lxml-2.0/src/lxml/sax.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/sax.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/sax.py Thu Feb 14 15:52:28 2008
@@ -1,3 +1,15 @@
+"""
+SAX-based adapter to copy trees from/to the Python standard library.
+
+Use the `ElementTreeContentHandler` class to build an ElementTree from
+SAX events.
+
+Use the `ElementTreeProducer` class or the `saxify()` function to fire
+the SAX events of an ElementTree against a SAX ContentHandler.
+
+See http://codespeak.net/lxml/sax.html
+"""
+
from xml.sax.handler import ContentHandler
import etree
from etree import ElementTree, SubElement
@@ -220,4 +232,7 @@
return prefix + ':' + local_name
def saxify(element_or_tree, content_handler):
+ """One-shot helper to generate SAX events from an XML tree and fire
+ them against a SAX ContentHandler.
+ """
return ElementTreeProducer(element_or_tree, content_handler).saxify()
Modified: lxml/branch/lxml-2.0/src/lxml/schematron.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/schematron.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/schematron.pxi Thu Feb 14 15:52:28 2008
@@ -66,14 +66,15 @@
# Schematron
cdef class Schematron(_Validator):
- """A Schematron validator.
+ """Schematron(self, etree=None, file=None)
+ A Schematron validator.
Pass a root Element or an ElementTree to turn it into a validator.
Alternatively, pass a filename as keyword argument 'file' to parse from
the file system.
"""
cdef schematron.xmlSchematron* _c_schema
- def __init__(self, etree=None, file=None):
+ def __init__(self, etree=None, *, file=None):
cdef _Document doc
cdef _Element root_node
cdef xmlNode* c_node
@@ -120,7 +121,9 @@
schematron.xmlSchematronFree(self._c_schema)
def __call__(self, etree):
- """Validate doc using Schematron.
+ """__call__(self, etree)
+
+ Validate doc using Schematron.
Returns true if document is valid, false if not."""
cdef _Document doc
Modified: lxml/branch/lxml-2.0/src/lxml/tests/__init__.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/tests/__init__.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/tests/__init__.py Thu Feb 14 15:52:28 2008
@@ -1,2 +1,4 @@
-# this is a package
+"""
+The lxml test suite for lxml, ElementTree and cElementTree.
+"""
Modified: lxml/branch/lxml-2.0/src/lxml/tests/common_imports.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/tests/common_imports.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/tests/common_imports.py Thu Feb 14 15:52:28 2008
@@ -5,6 +5,15 @@
from lxml import etree
+def make_version_tuple(version_string):
+ l = []
+ for part in re.findall('([0-9]+|[^0-9.]+)', version_string):
+ try:
+ l.append(int(part))
+ except ValueError:
+ l.append(part)
+ return tuple(l)
+
try:
from elementtree import ElementTree # standard ET
except ImportError:
@@ -14,7 +23,7 @@
ElementTree = None
if hasattr(ElementTree, 'VERSION'):
- if tuple(ElementTree.VERSION.split('.')) < (1,3):
+ if make_version_tuple(ElementTree.VERSION)[:2] < (1,3):
# compatibility tests require ET 1.3+
ElementTree = None
@@ -27,8 +36,8 @@
cElementTree = None
if hasattr(cElementTree, 'VERSION'):
- if tuple(cElementTree.VERSION.split('.')) < (1,0,7):
- # compatibility tests require cET 1.0.7+
+ if make_version_tuple(cElementTree.VERSION)[:2] <= (1,0):
+ # compatibility tests do not run with cET 1.0.7
cElementTree = None
try:
Modified: lxml/branch/lxml-2.0/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/tests/test_elementtree.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/tests/test_elementtree.py Thu Feb 14 15:52:28 2008
@@ -16,7 +16,7 @@
if cElementTree is not None:
if tuple([int(n) for n in
- getattr(cElementTree, "VERSION", "0.0").split(".")]) <= (1,0,6):
+ getattr(cElementTree, "VERSION", "0.0").split(".")]) <= (1,0,7):
cElementTree = None
try:
@@ -2006,6 +2006,50 @@
[d, c, b],
list(a))
+ def test_setslice_all_replace_reversed_ns1(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('{ns}a')
+ b = SubElement(a, '{ns}b', {'{ns1}a1': 'test'})
+ c = SubElement(a, '{ns}c', {'{ns2}a2': 'test'})
+ d = SubElement(a, '{ns}d', {'{ns3}a3': 'test'})
+
+ s = [d, c, b]
+ a[:] = s
+ self.assertEquals(
+ [d, c, b],
+ list(a))
+ self.assertEquals(
+ ['{ns}d', '{ns}c', '{ns}b'],
+ [ child.tag for child in a ])
+
+ self.assertEquals(
+ [['{ns3}a3'], ['{ns2}a2'], ['{ns1}a1']],
+ [ child.attrib.keys() for child in a ])
+
+ def test_setslice_all_replace_reversed_ns2(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('{ns}a')
+ b = SubElement(a, '{ns1}b', {'{ns}a1': 'test'})
+ c = SubElement(a, '{ns2}c', {'{ns}a2': 'test'})
+ d = SubElement(a, '{ns3}d', {'{ns}a3': 'test'})
+
+ s = [d, c, b]
+ a[:] = s
+ self.assertEquals(
+ [d, c, b],
+ list(a))
+ self.assertEquals(
+ ['{ns3}d', '{ns2}c', '{ns1}b'],
+ [ child.tag for child in a ])
+
+ self.assertEquals(
+ [['{ns}a3'], ['{ns}a2'], ['{ns}a1']],
+ [ child.attrib.keys() for child in a ])
+
def test_setslice_end(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
Modified: lxml/branch/lxml-2.0/src/lxml/tests/test_objectify.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/tests/test_objectify.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/tests/test_objectify.py Thu Feb 14 15:52:28 2008
@@ -427,9 +427,61 @@
self.assertRaises(TypeError, setattr, root.c1.c2, 'text', "test")
self.assertRaises(TypeError, setattr, root.c1.c2, 'pyval', "test")
- def test_setslice(self):
+ # slicing
+
+ def test_getslice_complete(self):
+ root = self.XML("c1c2")
+ self.assertEquals(["c1", "c2"],
+ [ c.text for c in root.c[:] ])
+
+ def test_getslice_partial(self):
+ root = self.XML("c1c2c3c4")
+ test_list = ["c1", "c2", "c3", "c4"]
+
+ self.assertEquals(test_list,
+ [ c.text for c in root.c[:] ])
+ self.assertEquals(test_list[1:2],
+ [ c.text for c in root.c[1:2] ])
+ self.assertEquals(test_list[-3:-1],
+ [ c.text for c in root.c[-3:-1] ])
+ self.assertEquals(test_list[-3:3],
+ [ c.text for c in root.c[-3:3] ])
+ self.assertEquals(test_list[-3000:3],
+ [ c.text for c in root.c[-3000:3] ])
+ self.assertEquals(test_list[-3:3000],
+ [ c.text for c in root.c[-3:3000] ])
+
+ def test_getslice_partial_neg(self):
+ root = self.XML("c1c2c3c4")
+ test_list = ["c1", "c2", "c3", "c4"]
+
+ self.assertEquals(test_list,
+ [ c.text for c in root.c[:] ])
+ self.assertEquals(test_list[2:1:-1],
+ [ c.text for c in root.c[2:1:-1] ])
+ self.assertEquals(test_list[-1:-3:-1],
+ [ c.text for c in root.c[-1:-3:-1] ])
+ self.assertEquals(test_list[2:-3:-1],
+ [ c.text for c in root.c[2:-3:-1] ])
+ self.assertEquals(test_list[2:-3000:-1],
+ [ c.text for c in root.c[2:-3000:-1] ])
+
+ # slice assignment
+
+ def test_setslice_complete(self):
+ Element = self.Element
+ root = Element("root")
+ root.c = ["c1", "c2"]
+
+ c1 = root.c[0]
+ c2 = root.c[1]
+
+ self.assertEquals([c1,c2], list(root.c))
+ self.assertEquals(["c1", "c2"],
+ [ c.text for c in root.c ])
+
+ def test_setslice_elements(self):
Element = self.Element
- SubElement = self.etree.SubElement
root = Element("root")
root.c = ["c1", "c2"]
@@ -455,10 +507,143 @@
self.assertEquals(["c1", "c2", "c2", "c1"],
[ c.text for c in root.c ])
+ def test_setslice_partial(self):
+ Element = self.Element
+ root = Element("root")
+ l = ["c1", "c2", "c3", "c4"]
+ root.c = l
+
+ self.assertEquals(["c1", "c2", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ new_slice = ["cA", "cB"]
+ l[1:2] = new_slice
+ root.c[1:2] = new_slice
+
+ self.assertEquals(["c1", "cA", "cB", "c3", "c4"], l)
+ self.assertEquals(["c1", "cA", "cB", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ def test_setslice_insert(self):
+ Element = self.Element
+ root = Element("root")
+ l = ["c1", "c2", "c3", "c4"]
+ root.c = l
+
+ self.assertEquals(["c1", "c2", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ new_slice = ["cA", "cB"]
+ l[1:1] = new_slice
+ root.c[1:1] = new_slice
+
+ self.assertEquals(["c1", "cA", "cB", "c2", "c3", "c4"], l)
+ self.assertEquals(["c1", "cA", "cB", "c2", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ def test_setslice_insert_neg(self):
+ Element = self.Element
+ root = Element("root")
+ l = ["c1", "c2", "c3", "c4"]
+ root.c = l
+
+ self.assertEquals(["c1", "c2", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ new_slice = ["cA", "cB"]
+ l[-2:-2] = new_slice
+ root.c[-2:-2] = new_slice
+
+ self.assertEquals(["c1", "c2", "cA", "cB", "c3", "c4"], l)
+ self.assertEquals(["c1", "c2", "cA", "cB", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ def test_setslice_empty(self):
+ Element = self.Element
+ root = Element("root")
+
+ root.c = []
+ self.assertRaises(
+ AttributeError, getattr, root, 'c')
+
+ def test_setslice_partial_wrong_length(self):
+ Element = self.Element
+ root = Element("root")
+ l = ["c1", "c2", "c3", "c4"]
+ root.c = l
+
+ self.assertEquals(["c1", "c2", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ new_slice = ["cA", "cB", "cC"]
+ self.assertRaises(
+ ValueError, operator.setitem,
+ l, slice(1,2,-1), new_slice)
+ self.assertRaises(
+ ValueError, operator.setitem,
+ root.c, slice(1,2,-1), new_slice)
+
+ def test_setslice_partial_neg(self):
+ Element = self.Element
+ root = Element("root")
+ l = ["c1", "c2", "c3", "c4"]
+ root.c = l
+
+ self.assertEquals(["c1", "c2", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ new_slice = ["cA", "cB"]
+ l[-1:1:-1] = new_slice
+ root.c[-1:1:-1] = new_slice
+
+ self.assertEquals(["c1", "c2", "cB", "cA"], l)
+ self.assertEquals(["c1", "c2", "cB", "cA"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ def test_setslice_partial_allneg(self):
+ Element = self.Element
+ root = Element("root")
+ l = ["c1", "c2", "c3", "c4"]
+ root.c = l
+
+ self.assertEquals(["c1", "c2", "c3", "c4"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ new_slice = ["cA", "cB"]
+ l[-1:-4:-2] = new_slice
+ root.c[-1:-4:-2] = new_slice
+
+ self.assertEquals(["c1", "cB", "c3", "cA"], l)
+ self.assertEquals(["c1", "cB", "c3", "cA"],
+ [ c.text for c in root.c ])
+ self.assertEquals(l,
+ [ c.text for c in root.c ])
+
+ # other stuff
+
def test_set_string(self):
# make sure strings are not handled as sequences
Element = self.Element
- SubElement = self.etree.SubElement
root = Element("root")
root.c = "TEST"
self.assertEquals(["TEST"],
@@ -467,7 +652,6 @@
def test_setitem_string(self):
# make sure strings are set as children
Element = self.Element
- SubElement = self.etree.SubElement
root = Element("root")
root["c"] = "TEST"
self.assertEquals(["TEST"],
@@ -476,7 +660,6 @@
def test_setitem_string_special(self):
# make sure 'text' etc. are set as children
Element = self.Element
- SubElement = self.etree.SubElement
root = Element("root")
root["text"] = "TEST"
Modified: lxml/branch/lxml-2.0/src/lxml/tests/test_pyclasslookup.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/tests/test_pyclasslookup.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/tests/test_pyclasslookup.py Thu Feb 14 15:52:28 2008
@@ -245,6 +245,57 @@
self.assertEquals([ c.tag for c in root.getchildren() ],
child_tags)
+ def test_lookup_iter_children(self):
+ el_class = self._buildElementClass()
+ el_class.CHILD_TAGS = None
+ def lookup(doc, el):
+ if el_class.CHILD_TAGS is None:
+ el_class.CHILD_TAGS = [ c.tag for c in el ]
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ child_tags = root.CHILD_TAGS
+ self.assertNotEquals(None, child_tags)
+ self.assertEquals([ c.tag for c in root.getchildren() ],
+ child_tags)
+
+ def test_lookup_iterchildren(self):
+ el_class = self._buildElementClass()
+ el_class.CHILD_TAGS = None
+ def lookup(doc, el):
+ if el_class.CHILD_TAGS is None:
+ el_class.CHILD_TAGS = [ c.tag for c in el.iterchildren() ]
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ child_tags = root.CHILD_TAGS
+ self.assertNotEquals(None, child_tags)
+ self.assertEquals([ c.tag for c in root.getchildren() ],
+ child_tags)
+
+ def test_lookup_iterchildren_tag(self):
+ el_class = self._buildElementClass()
+ el_class.CHILD_TAGS = None
+ def lookup(doc, el):
+ if not el_class.CHILD_TAGS:
+ el_class.CHILD_TAGS = [
+ c.tag for c in el.iterchildren(tag='{objectified}c2') ]
+ return el_class
+ self._setClassLookup(lookup)
+
+ root = self.XML(xml_str)
+ child_tags = root.CHILD_TAGS
+ self.assertNotEquals(None, child_tags)
+ self.assertEquals([], child_tags)
+
+ c1 = root[0]
+ child_tags = root.CHILD_TAGS
+ self.assertNotEquals(None, child_tags)
+ self.assertNotEquals([], child_tags)
+ self.assertEquals(
+ [ c.tag for c in root[0].iterchildren(tag='{objectified}c2') ],
+ child_tags)
+
def test_lookup_getparent(self):
el_class = self._buildElementClass()
el_class.PARENT = None
Modified: lxml/branch/lxml-2.0/src/lxml/usedoctest.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/usedoctest.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/usedoctest.py Thu Feb 14 15:52:28 2008
@@ -1,3 +1,13 @@
+"""Doctest module for XML comparison.
+
+Usage::
+
+ >>> import lxml.usedoctest
+ >>> # now do your XML doctests ...
+
+See `lxml.doctestcompare`
+"""
+
from lxml import doctestcompare
doctestcompare.temp_install(del_module=__name__)
Modified: lxml/branch/lxml-2.0/src/lxml/xinclude.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/xinclude.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/xinclude.pxi Thu Feb 14 15:52:28 2008
@@ -8,7 +8,8 @@
pass
cdef class XInclude:
- """XInclude processor.
+ """XInclude(self)
+ XInclude processor.
Create an instance and call it on an Element to run XInclude
processing.
@@ -22,6 +23,7 @@
return self._error_log.copy()
def __call__(self, _Element node not None):
+ "__call__(self, node)"
# We cannot pass the XML_PARSE_NOXINCNODE option as this would free
# the XInclude nodes - there may still be Python references to them!
# Therefore, we allow XInclude nodes to be converted to
Modified: lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi Thu Feb 14 15:52:28 2008
@@ -5,7 +5,9 @@
# module level API functions
def clear_error_log():
- """Clear the global error log. Note that this log is already bound to a
+ """clear_error_log()
+
+ Clear the global error log. Note that this log is already bound to a
fixed size.
"""
__GLOBAL_ERROR_LOG.clear()
@@ -14,7 +16,7 @@
"""Clear the global error log. Note that this log is already bound to a
fixed size.
- @deprecated: use ``clear_error_log()`` instead.
+ :deprecated: use ``clear_error_log()`` instead.
"""
__GLOBAL_ERROR_LOG.clear()
@@ -233,8 +235,10 @@
return _ListErrorLog(filtered, None, None)
def filter_types(self, types):
- """Filter the errors by the given types and return a new error log
- containing the matches.
+ """filter_types(self, types)
+
+ Filter the errors by the given types and return a new error
+ log containing the matches.
"""
cdef _LogEntry entry
if not python.PySequence_Check(types):
@@ -246,8 +250,10 @@
return _ListErrorLog(filtered, None, None)
def filter_levels(self, levels):
- """Filter the errors by the given error levels and return a new error
- log containing the matches.
+ """filter_levels(self, levels)
+
+ Filter the errors by the given error levels and return a new
+ error log containing the matches.
"""
cdef _LogEntry entry
if not python.PySequence_Check(levels):
@@ -259,7 +265,10 @@
return _ListErrorLog(filtered, None, None)
def filter_from_level(self, level):
- "Return a log with all messages of the requested level of worse."
+ """filter_from_level(self, level)
+
+ Return a log with all messages of the requested level of worse.
+ """
cdef _LogEntry entry
filtered = []
for entry in self._entries:
@@ -268,15 +277,24 @@
return _ListErrorLog(filtered, None, None)
def filter_from_fatals(self):
- "Convenience method to get all fatal error messages."
+ """filter_from_fatals(self)
+
+ Convenience method to get all fatal error messages.
+ """
return self.filter_from_level(ErrorLevels.FATAL)
def filter_from_errors(self):
- "Convenience method to get all error messages or worse."
+ """filter_from_errors(self)
+
+ Convenience method to get all error messages or worse.
+ """
return self.filter_from_level(ErrorLevels.ERROR)
def filter_from_warnings(self):
- "Convenience method to get all warnings or worse."
+ """filter_from_warnings(self)
+
+ Convenience method to get all warnings or worse.
+ """
return self.filter_from_level(ErrorLevels.WARNING)
cdef class _ErrorLog(_ListErrorLog):
@@ -331,7 +349,8 @@
python.PyList_Append(entries, entry)
cdef class PyErrorLog(_BaseErrorLog):
- """A global error log that connects to the Python stdlib logging package.
+ """PyErrorLog(self, logger_name=None)
+ A global error log that connects to the Python stdlib logging package.
The constructor accepts an optional logger name.
@@ -395,12 +414,14 @@
Note that this disables access to the global error log from exceptions.
Parsers, XSLT etc. will continue to provide their normal local error log.
- @deprecated: use ``use_global_python_log()`` instead.
+ :deprecated: use ``use_global_python_log()`` instead.
"""
use_global_python_log(log)
def use_global_python_log(PyErrorLog log not None):
- """Replace the global error log by an etree.PyErrorLog that uses the
+ """use_global_python_log(log)
+
+ Replace the global error log by an etree.PyErrorLog that uses the
standard Python logging package.
Note that this disables access to the global error log from exceptions.
Modified: lxml/branch/lxml-2.0/src/lxml/xmlid.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/xmlid.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/xmlid.pxi Thu Feb 14 15:52:28 2008
@@ -1,7 +1,9 @@
cdef object _find_id_attributes
def XMLID(text):
- """Parse the text and return a tuple (root node, ID dictionary). The root
+ """XMLID(text)
+
+ Parse the text and return a tuple (root node, ID dictionary). The root
node is the same as returned by the XML() function. The dictionary
contains string-element pairs. The dictionary keys are the values of 'id'
attributes. The elements referenced by the ID are stored as dictionary
@@ -19,7 +21,9 @@
return (root, dic)
def XMLDTDID(text):
- """Parse the text and return a tuple (root node, ID dictionary). The root
+ """XMLDTDID(text)
+
+ Parse the text and return a tuple (root node, ID dictionary). The root
node is the same as returned by the XML() function. The dictionary
contains string-element pairs. The dictionary keys are the values of ID
attributes as defined by the DTD. The elements referenced by the ID are
@@ -37,7 +41,9 @@
return (root, _IDDict(root))
def parseid(source, parser=None):
- """Parses the source into a tuple containing an ElementTree object and an
+ """parseid(source, parser=None)
+
+ Parses the source into a tuple containing an ElementTree object and an
ID dictionary. If no parser is provided as second argument, the default
parser is used.
@@ -49,7 +55,8 @@
return (_elementTreeFactory(doc, None), _IDDict(doc))
cdef class _IDDict:
- """A dictionary-like proxy class that mapps ID attributes to elements.
+ """IDDict(self, etree)
+ A dictionary-like proxy class that mapps ID attributes to elements.
The dictionary must be instantiated with the root element of a parsed XML
document, otherwise the behaviour is undefined. Elements and XML trees
Modified: lxml/branch/lxml-2.0/src/lxml/xmlschema.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/xmlschema.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/xmlschema.pxi Thu Feb 14 15:52:28 2008
@@ -20,7 +20,8 @@
# XMLSchema
cdef class XMLSchema(_Validator):
- """Turn a document into an XML Schema validator.
+ """XMLSchema(self, etree=None, file=None)
+ Turn a document into an XML Schema validator.
Either pass a schema as Element or ElementTree, or pass a file or
filename through the ``file`` keyword argument.
@@ -83,7 +84,9 @@
xmlschema.xmlSchemaFree(self._c_schema)
def __call__(self, etree):
- """Validate doc using XML Schema.
+ """__call__(self, etree)
+
+ Validate doc using XML Schema.
Returns true if document is valid, false if not.
"""
Modified: lxml/branch/lxml-2.0/src/lxml/xpath.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/xpath.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/xpath.pxi Thu Feb 14 15:52:28 2008
@@ -127,13 +127,17 @@
self._context.set_context(xpathCtxt)
def evaluate(self, _eval_arg, **_variables):
- """Evaluate an XPath expression.
+ """evaluate(self, _eval_arg, **_variables)
+
+ Evaluate an XPath expression.
Instead of calling this method, you can also call the evaluator object
itself.
Variables may be provided as keyword arguments. Note that namespaces
are currently not supported for variables.
+
+ :deprecated: call the object, not its method.
"""
return self(_eval_arg, **_variables)
@@ -207,7 +211,8 @@
cdef class XPathElementEvaluator(_XPathEvaluatorBase):
- """Create an XPath evaluator for an element.
+ """XPathElementEvaluator(self, element, namespaces=None, extensions=None, regexp=True)
+ Create an XPath evaluator for an element.
Absolute XPath expressions (starting with '/') will be evaluated against
the ElementTree as returned by getroottree().
@@ -232,17 +237,34 @@
def registerNamespace(self, prefix, uri):
"""Register a namespace with the XPath context.
+
+ :deprecated: use ``register_namespace()`` instead
+ """
+ self._context.addNamespace(prefix, uri)
+
+ def register_namespace(self, prefix, uri):
+ """Register a namespace with the XPath context.
"""
self._context.addNamespace(prefix, uri)
def registerNamespaces(self, namespaces):
"""Register a prefix -> uri dict.
+
+ :deprecated: use ``register_namespaces()`` instead
+ """
+ for prefix, uri in namespaces.items():
+ self._context.addNamespace(prefix, uri)
+
+ def register_namespaces(self, namespaces):
+ """Register a prefix -> uri dict.
"""
for prefix, uri in namespaces.items():
self._context.addNamespace(prefix, uri)
def __call__(self, _path, **_variables):
- """Evaluate an XPath expression on the document.
+ """__call__(self, _path, **_variables)
+
+ Evaluate an XPath expression on the document.
Variables may be provided as keyword arguments. Note that namespaces
are currently not supported for variables.
@@ -276,7 +298,8 @@
cdef class XPathDocumentEvaluator(XPathElementEvaluator):
- """Create an XPath evaluator for an ElementTree.
+ """XPathDocumentEvaluator(self, etree, namespaces=None, extensions=None, regexp=True)
+ Create an XPath evaluator for an ElementTree.
Additional namespace declarations can be passed with the 'namespace'
keyword argument. EXSLT regular expression support can be disabled with
@@ -289,7 +312,9 @@
extensions=extensions, regexp=regexp)
def __call__(self, _path, **_variables):
- """Evaluate an XPath expression on the document.
+ """__call__(self, _path, **_variables)
+
+ Evaluate an XPath expression on the document.
Variables may be provided as keyword arguments. Note that namespaces
are currently not supported for variables.
@@ -327,7 +352,9 @@
def XPathEvaluator(etree_or_element, *, namespaces=None, extensions=None,
regexp=True):
- """Creates an XPath evaluator for an ElementTree or an Element.
+ """XPathEvaluator(etree_or_element, namespaces=None, extensions=None, regexp=True)
+
+ Creates an XPath evaluator for an ElementTree or an Element.
The resulting object can be called with an XPath expression as argument
and XPath variables provided as keyword arguments.
@@ -347,8 +374,8 @@
cdef class XPath(_XPathEvaluatorBase):
- """A compiled XPath expression that can be called on Elements and
- ElementTrees.
+ """XPath(self, path, namespaces=None, extensions=None, regexp=True)
+ A compiled XPath expression that can be called on Elements and ElementTrees.
Besides the XPath expression, you can pass prefix-namespace mappings and
extension functions to the constructor through the keyword arguments
@@ -374,6 +401,7 @@
self._raise_parse_error()
def __call__(self, _etree_or_element, **_variables):
+ "__call__(self, _etree_or_element, **_variables)"
cdef xpath.xmlXPathObject* xpathObj
cdef _Document document
cdef _Element element
@@ -414,8 +442,8 @@
_find_namespaces = re.compile('({[^}]+})').findall
cdef class ETXPath(XPath):
- """Special XPath class that supports the ElementTree {uri} notation for
- namespaces.
+ """ETXPath(self, path, extensions=None, regexp=True)
+ Special XPath class that supports the ElementTree {uri} notation for namespaces.
Note that this class does not accept the ``namespace`` keyword
argument. All namespaces must be passed as part of the path string.
Modified: lxml/branch/lxml-2.0/src/lxml/xslt.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/xslt.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/xslt.pxi Thu Feb 14 15:52:28 2008
@@ -167,16 +167,20 @@
# XSLT file/network access control
cdef class XSLTAccessControl:
- """Access control for XSLT: reading/writing files, directories and network
- I/O. Access to a type of resource is granted or denied by passing any of
- the following keyword arguments. All of them default to True to allow
- access.
-
- * read_file
- * write_file
- * create_dir
- * read_network
- * write_network
+ """XSLTAccessControl(self, read_file=True, write_file=True, create_dir=True, read_network=True, write_network=True)
+
+ Access control for XSLT: reading/writing files, directories and
+ network I/O. Access to a type of resource is granted or denied by
+ passing any of the following boolean keyword arguments. All of
+ them default to True to allow access.
+
+ - read_file
+ - write_file
+ - create_dir
+ - read_network
+ - write_network
+
+ See `XSLT`.
"""
cdef xslt.xsltSecurityPrefs* _prefs
def __init__(self, *, read_file=True, write_file=True, create_dir=True,
@@ -252,16 +256,28 @@
cdef class XSLT:
- """Turn a document into an XSLT object.
+ """XSLT(self, xslt_input, extensions=None, regexp=True, access_control=None)
+
+ Turn an XSL document into an XSLT object.
+
+ Calling this object on a tree or Element will execute the XSLT::
+
+ >>> transform = etree.XSLT(xsl_tree)
+ >>> result = transform(xml_tree)
Keyword arguments of the constructor:
- * regexp - enable exslt regular expression support in XPath (default: True)
- * access_control - access restrictions for network or file system
- Keyword arguments of the XSLT run:
- * profile_run - enable XSLT profiling
+ - regexp: enable exslt regular expression support in XPath
+ (default: True)
+ - access_control: access restrictions for network or file
+ system (see `XSLTAccessControl`)
+
+ Keyword arguments of the XSLT call:
- Other keyword arguments are passed to the stylesheet.
+ - profile_run: enable XSLT profiling (default: False)
+
+ Other keyword arguments of the call are passed to the stylesheet
+ as parameters.
"""
cdef _XSLTContext _context
cdef xslt.xsltStylesheet* _c_style
@@ -328,14 +344,22 @@
xslt.xsltFreeStylesheet(self._c_style)
property error_log:
+ "The log of errors and warnings of an XSLT execution."
def __get__(self):
return self._error_log.copy()
def apply(self, _input, *, profile_run=False, **_kw):
+ """apply(self, _input, profile_run=False, **_kw)
+
+ :deprecated: call the object, not this method."""
return self(_input, profile_run=profile_run, **_kw)
def tostring(self, _ElementTree result_tree):
- """Save result doc to string based on stylesheet output method.
+ """tostring(self, result_tree)
+
+ Save result doc to string based on stylesheet output method.
+
+ :deprecated: use str(result_tree) instead.
"""
return str(result_tree)
@@ -346,6 +370,14 @@
return _copyXSLT(self)
def __call__(self, _input, *, profile_run=False, **_kw):
+ """__call__(self, _input, profile_run=False, **_kw)
+
+ Execute the XSL transformation on a tree or Element.
+
+ Pass the ``profile_run`` option to get profile information
+ about the XSLT. The result of the XSLT will have a property
+ xslt_profile that holds an XML tree with profiling data.
+ """
cdef _XSLTContext context
cdef _XSLTResolverContext resolver_context
cdef _Document input_doc
Modified: lxml/branch/lxml-2.0/version.txt
==============================================================================
--- lxml/branch/lxml-2.0/version.txt (original)
+++ lxml/branch/lxml-2.0/version.txt Thu Feb 14 15:52:28 2008
@@ -1 +1 @@
-2.0
+2.0.1
From scoder at codespeak.net Thu Feb 14 21:11:59 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 14 Feb 2008 21:11:59 +0100 (CET)
Subject: [Lxml-checkins] r51494 - in lxml/trunk: . doc
Message-ID: <20080214201159.A687A16844B@codespeak.net>
Author: scoder
Date: Thu Feb 14 21:11:55 2008
New Revision: 51494
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/objectify.txt
Log:
r3509 at delle: sbehnel | 2008-02-14 21:02:45 +0100
doc cleanup
Modified: lxml/trunk/doc/objectify.txt
==============================================================================
--- lxml/trunk/doc/objectify.txt (original)
+++ lxml/trunk/doc/objectify.txt Thu Feb 14 21:11:55 2008
@@ -16,31 +16,6 @@
used. Python data types are extracted from XML content automatically and made
available to the normal Python operators.
-To set up and use ``objectify``, you need both the ``lxml.etree`` module and
-``lxml.objectify``::
-
- >>> from lxml import etree
- >>> from lxml import objectify
-
-The objectify API is very different from the ElementTree API. If it
-is used, it should not be mixed with other element implementations
-(such as trees parsed with ``lxml.etree``), to avoid non-obvious
-behaviour.
-
-The `benchmark page`_ has some hints on performance optimisation of code using
-lxml.objectify.
-
-To make the doctests in this document look a little nicer, we also use this:
-
- >>> import lxml.usedoctest
-
-Imported from within a doctest, this relieves us from caring about the exact
-formatting of XML output.
-
-.. _Amara: http://uche.ogbuji.net/tech/4suite/amara/
-.. _gnosis.xml.objectify: http://gnosis.cx/download/
-.. _`benchmark page`: performance.html#lxml-objectify
-
.. contents::
..
1 The lxml.objectify API
@@ -61,6 +36,33 @@
5.5 Advanced element class lookup
6 What is different from lxml.etree?
+.. _Amara: http://uche.ogbuji.net/tech/4suite/amara/
+.. _gnosis.xml.objectify: http://gnosis.cx/download/
+.. _`benchmark page`: performance.html#lxml-objectify
+
+
+To set up and use ``objectify``, you need both the ``lxml.etree``
+module and ``lxml.objectify``::
+
+ >>> from lxml import etree
+ >>> from lxml import objectify
+
+The objectify API is very different from the ElementTree API. If it
+is used, it should not be mixed with other element implementations
+(such as trees parsed with ``lxml.etree``), to avoid non-obvious
+behaviour.
+
+The `benchmark page`_ has some hints on performance optimisation of
+code using lxml.objectify.
+
+To make the doctests in this document look a little nicer, we also use
+this::
+
+ >>> import lxml.usedoctest
+
+Imported from within a doctest, this relieves us from caring about the exact
+formatting of XML output.
+
The lxml.objectify API
======================
From scoder at codespeak.net Thu Feb 14 21:12:02 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 14 Feb 2008 21:12:02 +0100 (CET)
Subject: [Lxml-checkins] r51495 - in lxml/trunk: . src/lxml
Message-ID: <20080214201202.2F5BE16844B@codespeak.net>
Author: scoder
Date: Thu Feb 14 21:12:01 2008
New Revision: 51495
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.objectify.pyx
Log:
r3510 at delle: sbehnel | 2008-02-14 21:04:10 +0100
cleanup of ObjectifiedElement.__delitem__()
Modified: lxml/trunk/src/lxml/lxml.objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.objectify.pyx (original)
+++ lxml/trunk/src/lxml/lxml.objectify.pyx Thu Feb 14 21:12:01 2008
@@ -317,26 +317,17 @@
def __delitem__(self, key):
cdef Py_ssize_t start, stop, step, slicelength
+ parent = self.getparent()
+ if parent is None:
+ raise TypeError("deleting items not supported by root element")
if python.PySlice_Check(key):
# slice deletion
- python.PySlice_GetIndicesEx(
- key, _countSiblings(self._c_node),
- &start, &stop, &step, &slicelength)
- parent = self.getparent()
- if parent is None:
- raise TypeError("deleting slices of root element not supported")
- if step < 0:
- del_items = list(self)[start:stop:step]
- else:
- del_items = list(islice(self, start, stop, step))
+ del_items = list(self)[key]
remove = parent.remove
for el in del_items:
remove(el)
else:
# normal index deletion
- parent = self.getparent()
- if parent is None:
- raise TypeError("deleting items not supported by root element")
sibling = self.__getitem__(key)
parent.remove(sibling)
From scoder at codespeak.net Thu Feb 14 21:12:06 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 14 Feb 2008 21:12:06 +0100 (CET)
Subject: [Lxml-checkins] r51496 - lxml/trunk
Message-ID: <20080214201206.74C9016844E@codespeak.net>
Author: scoder
Date: Thu Feb 14 21:12:05 2008
New Revision: 51496
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
Log:
r3511 at delle: sbehnel | 2008-02-14 21:10:41 +0100
changelog
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu Feb 14 21:12:05 2008
@@ -2,6 +2,21 @@
lxml changelog
==============
+2.0.2 (Under development)
+=========================
+
+Features added
+--------------
+
+Bugs fixed
+----------
+
+* Slice deletion bug fixed in objectify.
+
+Other changes
+-------------
+
+
2.0.1 (2008-02-13)
==================
From scoder at codespeak.net Fri Feb 15 10:22:28 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 15 Feb 2008 10:22:28 +0100 (CET)
Subject: [Lxml-checkins] r51508 - in lxml/trunk: . doc
Message-ID: <20080215092228.D4B1016843E@codespeak.net>
Author: scoder
Date: Fri Feb 15 10:22:28 2008
New Revision: 51508
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/build.txt
lxml/trunk/doc/pyrex.txt
Log:
r3515 at delle: sbehnel | 2008-02-15 08:27:28 +0100
doc fixes
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Fri Feb 15 10:22:28 2008
@@ -44,10 +44,10 @@
want to be an lxml developer, then you do need a working Cython
installation. You can use EasyInstall_ to install it::
- easy_install Cython==0.9.6.11b
+ easy_install Cython==0.9.6.12
-lxml currently requires Cython 0.9.6.11b, later versions were not
-tested.
+lxml currently requires Cython 0.9.6.11b or 0.9.6.12, later versions
+were not tested.
Subversion
Modified: lxml/trunk/doc/pyrex.txt
==============================================================================
--- lxml/trunk/doc/pyrex.txt (original)
+++ lxml/trunk/doc/pyrex.txt Fri Feb 15 10:22:28 2008
@@ -22,4 +22,4 @@
clear description of what you did to run into the problems and provide the
compiler output that shows the error.
-.. Pyrex_: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/
+.. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/
From scoder at codespeak.net Fri Feb 15 10:22:33 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 15 Feb 2008 10:22:33 +0100 (CET)
Subject: [Lxml-checkins] r51509 - in lxml/trunk: . doc
Message-ID: <20080215092233.4A754168441@codespeak.net>
Author: scoder
Date: Fri Feb 15 10:22:32 2008
New Revision: 51509
Added:
lxml/trunk/doc/lxml-source-howto.txt
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/mkhtml.py
Log:
r3516 at delle: sbehnel | 2008-02-15 10:21:52 +0100
initial document: starting to work on the source code
Added: lxml/trunk/doc/lxml-source-howto.txt
==============================================================================
--- (empty file)
+++ lxml/trunk/doc/lxml-source-howto.txt Fri Feb 15 10:22:32 2008
@@ -0,0 +1,174 @@
+==============================
+How to read the source of lxml
+==============================
+
+:Author:
+ Stefan Behnel
+
+.. meta::
+ :description: How to read and work on the source code of lxml
+ :keywords: lxml, XML, Cython, source code, develop, comprehend, understand
+
+This document describes how to read the source code of lxml_ and how
+to start working on it. You might also be interested in the companion
+document that describes `how to build lxml from sources`_.
+
+.. _lxml: http://codespeak.net/lxml
+.. _`how to build lxml from sources`: build.html
+
+.. contents::
+..
+
+
+What is Cython?
+===============
+
+.. _Cython: http://cython.org/
+.. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/
+
+Cython_ is the language that lxml is written in. It is a very
+Python-like language that was specifically designed for writing Python
+extension modules. The language is so close to Python that the Cython
+compiler can actually compile many, many Python programs to C without
+major modifications. But the real speed gains of a C compilation come
+from type annotations that were added to the language and that allow
+Cython to generate very efficient C code.
+
+The reason why Cython (or actually its predecessor Pyrex_ at the time)
+was chosen as an implementation language for lxml, is that it makes it
+very easy to interface with both the Python world and external C code.
+Cython generates all the necessary glue code for the Python API,
+including Python types and reference counting for Python objects.
+Calling into C code is not more than declaring the signature of the
+function and maybe some variables as being C types, pointers or
+structs, and then calling it. The rest of the code is just plain
+Python code.
+
+
+Where to start?
+===============
+
+First of all, read `how to build lxml from sources` to learn how to
+retrieve the source code from the Subversion repository and how to set
+up a build environment. The source code lives in the subdirectory
+``src`` of the checkout. The documentation (which is written in the
+`ReStructured Text`_ format) lives in the ``doc`` directory.
+
+.. _`ReStructured Text`:
+
+The main extension modules in lxml are ``lxml.etree`` and
+``lxml.objectify``. All main modules have the file extension
+``.pyx``, which shows the descendence from Pyrex. As usual in Python,
+the main files start with a short description and a couple of imports.
+Cython destinguishes between the run-time ``import`` statement (as
+known from Python) and the compile-time ``cimport`` statement, which
+imports C declarations, either from external libraries or from other
+Cython modules.
+
+
+Concepts
+--------
+
+* proxies
+* naming conventions
+*
+
+
+lxml.etree
+----------
+
+The main module, ``lxml.etree``, is in the file ``lxml.etree.pyx``.
+It implements the main functions and types of the ElementTree API, and
+is therefore the best place to start if you want to find out how a
+specific feature is implemented.
+
+At the very end of the file, it contains a series of ``include``
+statements that merge the rest of the implementation into the
+generated C code. Yes, you read right: no importing, no source file
+namespacing, just plain good old include and a huge C code result that
+we throw right into the C compiler.
+
+The main files are:
+
+proxy.pxi:
+
+ Very low-level functions for memory allocation/deallocation
+ and Element proxy handling. Ignoring this for the beginning
+ will keep your head from exploding.
+
+apihelpers.pxi:
+
+ Private C helper functions. Most of the little functions that are
+ used all over the place are defined here. This includes things
+ like reading out the text content of a libxml2 tree node, checking
+ input from the API level, creating a new Element node or handling
+ attribute values. If you want to work on the lxml code, you
+ should keep these functions in the back of your head, as they will
+ definitely make your life easier.
+
+xmlerror.pxi:
+
+ Error log handling. All error messages that libxml2 generates
+ internally walk through the code in this file to end up in lxml's
+ Python level error logs.
+
+ At the end of the file, you will find a long list of named error
+ codes. It is generated from the libxml2 HTML documentation (using
+ lxml, of course). See the script ``update-error-constants.py``
+ for this.
+
+classlookup.pxi:
+
+ Element class lookup mechanisms. The main API and engines for
+ those who want to define custom Element classes and inject them
+ into lxml.
+
+nsclasses.pxi:
+
+ Namespace implementation and registry. The registry and engine
+ for Element classes that use the ElementNamespaceClassLookup
+ scheme.
+
+docloader.pxi:
+
+ Support for custom document loaders. Base class and registry for
+ custom document resolvers.
+
+parser.pxi:
+
+ Parsers for XML and HTML. This is the main parser engine. It's
+ the reason why you can parse a document from various sources in
+ two lines of Python code. It's definitely not the right place to
+ start reading lxml's soure code.
+
+parsertarget.pxi:
+
+ ET Parser target.
+
+serializer.pxi:
+
+ XML output functions
+
+iterparse.pxi:
+
+ incremental XML parsing
+
+xmlid.pxi:
+
+ XMLID and IDDict
+
+xinclude.pxi:
+
+ XInclude
+
+extensions.pxi:
+
+ XPath/XSLT extension functions
+
+xpath.pxi:
+
+ XPath evaluation
+
+xslt.pxi:
+
+ XSL transformations
Modified: lxml/trunk/doc/mkhtml.py
==============================================================================
--- lxml/trunk/doc/mkhtml.py (original)
+++ lxml/trunk/doc/mkhtml.py Fri Feb 15 10:22:32 2008
@@ -3,13 +3,14 @@
SITE_STRUCTURE = [
('lxml', ('main.txt', 'intro.txt', 'lxml2.txt', 'FAQ.txt',
- 'compatibility.txt', 'performance.txt', 'build.txt')),
+ 'compatibility.txt', 'performance.txt')),
('Developing with lxml', ('tutorial.txt', 'api.txt', 'parsing.txt',
'validation.txt', 'xpathxslt.txt',
'objectify.txt', 'lxmlhtml.txt',
'cssselect.txt', 'elementsoup.txt')),
('Extending lxml', ('resolvers.txt', 'extensions.txt',
'element_classes.txt', 'sax.txt', 'capi.txt')),
+ ('Developing lxml', ('build.txt', 'lxml-source-howto.txt')),
]
RST2HTML_OPTIONS = " ".join([
From scoder at codespeak.net Fri Feb 15 10:35:51 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 15 Feb 2008 10:35:51 +0100 (CET)
Subject: [Lxml-checkins] r51510 - in lxml/trunk: . doc
Message-ID: <20080215093551.0F48B168471@codespeak.net>
Author: scoder
Date: Fri Feb 15 10:35:49 2008
New Revision: 51510
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/lxml-source-howto.txt
Log:
r3519 at delle: sbehnel | 2008-02-15 10:35:15 +0100
doc structure
Modified: lxml/trunk/doc/lxml-source-howto.txt
==============================================================================
--- lxml/trunk/doc/lxml-source-howto.txt (original)
+++ lxml/trunk/doc/lxml-source-howto.txt Fri Feb 15 10:35:49 2008
@@ -67,7 +67,7 @@
Concepts
---------
+========
* proxies
* naming conventions
@@ -75,7 +75,7 @@
lxml.etree
-----------
+==========
The main module, ``lxml.etree``, is in the file ``lxml.etree.pyx``.
It implements the main functions and types of the ElementTree API, and
From scoder at codespeak.net Fri Feb 15 10:39:38 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 15 Feb 2008 10:39:38 +0100 (CET)
Subject: [Lxml-checkins] r51512 - in lxml/trunk: . doc
Message-ID: <20080215093938.407D0168471@codespeak.net>
Author: scoder
Date: Fri Feb 15 10:39:37 2008
New Revision: 51512
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/lxml-source-howto.txt
Log:
r3521 at delle: sbehnel | 2008-02-15 10:39:01 +0100
doc section on documentation
Modified: lxml/trunk/doc/lxml-source-howto.txt
==============================================================================
--- lxml/trunk/doc/lxml-source-howto.txt (original)
+++ lxml/trunk/doc/lxml-source-howto.txt Fri Feb 15 10:39:37 2008
@@ -15,6 +15,7 @@
.. _lxml: http://codespeak.net/lxml
.. _`how to build lxml from sources`: build.html
+.. _`ReStructured Text`: http://docutils.sourceforge.net/rst.html
.. contents::
..
@@ -54,8 +55,6 @@
``src`` of the checkout. The documentation (which is written in the
`ReStructured Text`_ format) lives in the ``doc`` directory.
-.. _`ReStructured Text`:
-
The main extension modules in lxml are ``lxml.etree`` and
``lxml.objectify``. All main modules have the file extension
``.pyx``, which shows the descendence from Pyrex. As usual in Python,
@@ -71,7 +70,16 @@
* proxies
* naming conventions
-*
+* ...
+
+
+The documentation
+=================
+
+* docs in ``doc`` directory
+* `ReStructured Text`_ format
+* generated through ``mkhtml.py`` script
+* ...
lxml.etree
From lxml-checkins at codespeak.net Fri Feb 15 15:14:05 2008
From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net)
Date: Fri, 15 Feb 2008 15:14:05 +0100 (CET)
Subject: [Lxml-checkins] February 76% OFF
Message-ID: <20080215161343.23936.qmail@pmsn.179.79.124.92.sable.dsl.krasnet.ru>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20080215/c0d4b6b5/attachment.htm
From scoder at codespeak.net Fri Feb 15 15:15:14 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 15 Feb 2008 15:15:14 +0100 (CET)
Subject: [Lxml-checkins] r51537 - in lxml/trunk: . src/lxml
Message-ID: <20080215141514.ED4B416847F@codespeak.net>
Author: scoder
Date: Fri Feb 15 15:15:14 2008
New Revision: 51537
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/xmlerror.pxd
lxml/trunk/src/lxml/xmlerror.pxi
lxml/trunk/update-error-constants.py
Log:
r3524 at delle: sbehnel | 2008-02-15 13:39:53 +0100
integrate all error type enums: parser, XPath, schema, relaxng
Modified: lxml/trunk/src/lxml/xmlerror.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxd (original)
+++ lxml/trunk/src/lxml/xmlerror.pxd Fri Feb 15 15:15:14 2008
@@ -767,6 +767,101 @@
XML_I18N_NO_OUTPUT = 6004 # 6004
XML_CHECK_ = 6005 # 5033
XML_CHECK_X = 6006 # 503
+
+ ctypedef enum xmlXPathError:
+ XPATH_EXPRESSION_OK = 0
+ XPATH_NUMBER_ERROR = 1
+ XPATH_UNFINISHED_LITERAL_ERROR = 2
+ XPATH_START_LITERAL_ERROR = 3
+ XPATH_VARIABLE_REF_ERROR = 4
+ XPATH_UNDEF_VARIABLE_ERROR = 5
+ XPATH_INVALID_PREDICATE_ERROR = 6
+ XPATH_EXPR_ERROR = 7
+ XPATH_UNCLOSED_ERROR = 8
+ XPATH_UNKNOWN_FUNC_ERROR = 9
+ XPATH_INVALID_OPERAND = 10
+ XPATH_INVALID_TYPE = 11
+ XPATH_INVALID_ARITY = 12
+ XPATH_INVALID_CTXT_SIZE = 13
+ XPATH_INVALID_CTXT_POSITION = 14
+ XPATH_MEMORY_ERROR = 15
+ XPTR_SYNTAX_ERROR = 16
+ XPTR_RESOURCE_ERROR = 17
+ XPTR_SUB_RESOURCE_ERROR = 18
+ XPATH_UNDEF_PREFIX_ERROR = 19
+ XPATH_ENCODING_ERROR = 20
+ XPATH_INVALID_CHAR_ERROR = 21
+ XPATH_INVALID_CTXT = 22
+
+ ctypedef enum xmlSchemaValidError:
+ XML_SCHEMAS_ERR_OK = 0
+ XML_SCHEMAS_ERR_NOROOT = 1
+ XML_SCHEMAS_ERR_UNDECLAREDELEM = 2
+ XML_SCHEMAS_ERR_NOTTOPLEVEL = 3
+ XML_SCHEMAS_ERR_MISSING = 4
+ XML_SCHEMAS_ERR_WRONGELEM = 5
+ XML_SCHEMAS_ERR_NOTYPE = 6
+ XML_SCHEMAS_ERR_NOROLLBACK = 7
+ XML_SCHEMAS_ERR_ISABSTRACT = 8
+ XML_SCHEMAS_ERR_NOTEMPTY = 9
+ XML_SCHEMAS_ERR_ELEMCONT = 10
+ XML_SCHEMAS_ERR_HAVEDEFAULT = 11
+ XML_SCHEMAS_ERR_NOTNILLABLE = 12
+ XML_SCHEMAS_ERR_EXTRACONTENT = 13
+ XML_SCHEMAS_ERR_INVALIDATTR = 14
+ XML_SCHEMAS_ERR_INVALIDELEM = 15
+ XML_SCHEMAS_ERR_NOTDETERMINIST = 16
+ XML_SCHEMAS_ERR_CONSTRUCT = 17
+ XML_SCHEMAS_ERR_INTERNAL = 18
+ XML_SCHEMAS_ERR_NOTSIMPLE = 19
+ XML_SCHEMAS_ERR_ATTRUNKNOWN = 20
+ XML_SCHEMAS_ERR_ATTRINVALID = 21
+ XML_SCHEMAS_ERR_VALUE = 22
+ XML_SCHEMAS_ERR_FACET = 23
+ XML_SCHEMAS_ERR_ = 24
+ XML_SCHEMAS_ERR_XXX = 25
+
+ ctypedef enum xmlRelaxNGValidErr:
+ XML_RELAXNG_OK = 0
+ XML_RELAXNG_ERR_MEMORY = 1
+ XML_RELAXNG_ERR_TYPE = 2
+ XML_RELAXNG_ERR_TYPEVAL = 3
+ XML_RELAXNG_ERR_DUPID = 4
+ XML_RELAXNG_ERR_TYPECMP = 5
+ XML_RELAXNG_ERR_NOSTATE = 6
+ XML_RELAXNG_ERR_NODEFINE = 7
+ XML_RELAXNG_ERR_LISTEXTRA = 8
+ XML_RELAXNG_ERR_LISTEMPTY = 9
+ XML_RELAXNG_ERR_INTERNODATA = 10
+ XML_RELAXNG_ERR_INTERSEQ = 11
+ XML_RELAXNG_ERR_INTEREXTRA = 12
+ XML_RELAXNG_ERR_ELEMNAME = 13
+ XML_RELAXNG_ERR_ATTRNAME = 14
+ XML_RELAXNG_ERR_ELEMNONS = 15
+ XML_RELAXNG_ERR_ATTRNONS = 16
+ XML_RELAXNG_ERR_ELEMWRONGNS = 17
+ XML_RELAXNG_ERR_ATTRWRONGNS = 18
+ XML_RELAXNG_ERR_ELEMEXTRANS = 19
+ XML_RELAXNG_ERR_ATTREXTRANS = 20
+ XML_RELAXNG_ERR_ELEMNOTEMPTY = 21
+ XML_RELAXNG_ERR_NOELEM = 22
+ XML_RELAXNG_ERR_NOTELEM = 23
+ XML_RELAXNG_ERR_ATTRVALID = 24
+ XML_RELAXNG_ERR_CONTENTVALID = 25
+ XML_RELAXNG_ERR_EXTRACONTENT = 26
+ XML_RELAXNG_ERR_INVALIDATTR = 27
+ XML_RELAXNG_ERR_DATAELEM = 28
+ XML_RELAXNG_ERR_VALELEM = 29
+ XML_RELAXNG_ERR_LISTELEM = 30
+ XML_RELAXNG_ERR_DATATYPE = 31
+ XML_RELAXNG_ERR_VALUE = 32
+ XML_RELAXNG_ERR_LIST = 33
+ XML_RELAXNG_ERR_NOGRAMMAR = 34
+ XML_RELAXNG_ERR_EXTRADATA = 35
+ XML_RELAXNG_ERR_LACKDATA = 36
+ XML_RELAXNG_ERR_INTERNAL = 37
+ XML_RELAXNG_ERR_ELEMWRONG = 38
+ XML_RELAXNG_ERR_TEXTWRONG = 39
# --- END: GENERATED CONSTANTS ---
cdef extern from "libxml/xmlerror.h":
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Fri Feb 15 15:15:14 2008
@@ -523,9 +523,12 @@
cdef void __initErrorConstants():
"Called at setup time to parse the constants and build the classes below."
find_constants = re.compile(r"\s*([a-zA-Z0-9_]+)\s*=\s*([0-9]+)").findall
- const_defs = ((ErrorLevels, __ERROR_LEVELS),
- (ErrorDomains, __ERROR_DOMAINS),
- (ErrorTypes, __ERROR_TYPES))
+ const_defs = ((ErrorLevels, __ERROR_LEVELS),
+ (ErrorDomains, __ERROR_DOMAINS),
+ (ErrorTypes, __PARSER_ERROR_TYPES),
+ (XPathErrorTypes, __XPATH_ERROR_TYPES),
+ (XMLSchemaErrorTypes, __XMLSCHEMA_ERROR_TYPES),
+ (RelaxNGErrorTypes, __RELAXNG_ERROR_TYPES))
for cls, constant_tuple in const_defs:
reverse_dict = {}
cls._names = reverse_dict
@@ -546,6 +549,15 @@
class ErrorTypes:
"Libxml2 error types"
+class XPathErrorTypes:
+ "Libxml2 XPath error types"
+
+class XMLSchemaErrorTypes:
+ "Libxml2 XML Schema error types"
+
+class RelaxNGErrorTypes:
+ "Libxml2 RelaxNG error types"
+
# --- BEGIN: GENERATED CONSTANTS ---
# This section is generated by the script 'update-error-constants.py'.
@@ -596,8 +608,8 @@
I18N=27
""",)
-cdef object __ERROR_TYPES
-__ERROR_TYPES = ("""\
+cdef object __PARSER_ERROR_TYPES
+__PARSER_ERROR_TYPES = ("""\
ERR_OK=0
ERR_INTERNAL_ERROR=1
ERR_NO_MEMORY=2
@@ -1343,6 +1355,107 @@
CHECK_=6005
CHECK_X=6006
""",)
+
+cdef object __XPATH_ERROR_TYPES
+__XPATH_ERROR_TYPES = ("""\
+XPATH_EXPRESSION_OK=0
+XPATH_NUMBER_ERROR=1
+XPATH_UNFINISHED_LITERAL_ERROR=2
+XPATH_START_LITERAL_ERROR=3
+XPATH_VARIABLE_REF_ERROR=4
+XPATH_UNDEF_VARIABLE_ERROR=5
+XPATH_INVALID_PREDICATE_ERROR=6
+XPATH_EXPR_ERROR=7
+XPATH_UNCLOSED_ERROR=8
+XPATH_UNKNOWN_FUNC_ERROR=9
+XPATH_INVALID_OPERAND=10
+XPATH_INVALID_TYPE=11
+XPATH_INVALID_ARITY=12
+XPATH_INVALID_CTXT_SIZE=13
+XPATH_INVALID_CTXT_POSITION=14
+XPATH_MEMORY_ERROR=15
+XPTR_SYNTAX_ERROR=16
+XPTR_RESOURCE_ERROR=17
+XPTR_SUB_RESOURCE_ERROR=18
+XPATH_UNDEF_PREFIX_ERROR=19
+XPATH_ENCODING_ERROR=20
+XPATH_INVALID_CHAR_ERROR=21
+XPATH_INVALID_CTXT=22
+""",)
+
+cdef object __XMLSCHEMA_ERROR_TYPES
+__XMLSCHEMA_ERROR_TYPES = ("""\
+SCHEMAS_ERR_OK=0
+SCHEMAS_ERR_NOROOT=1
+SCHEMAS_ERR_UNDECLAREDELEM=2
+SCHEMAS_ERR_NOTTOPLEVEL=3
+SCHEMAS_ERR_MISSING=4
+SCHEMAS_ERR_WRONGELEM=5
+SCHEMAS_ERR_NOTYPE=6
+SCHEMAS_ERR_NOROLLBACK=7
+SCHEMAS_ERR_ISABSTRACT=8
+SCHEMAS_ERR_NOTEMPTY=9
+SCHEMAS_ERR_ELEMCONT=10
+SCHEMAS_ERR_HAVEDEFAULT=11
+SCHEMAS_ERR_NOTNILLABLE=12
+SCHEMAS_ERR_EXTRACONTENT=13
+SCHEMAS_ERR_INVALIDATTR=14
+SCHEMAS_ERR_INVALIDELEM=15
+SCHEMAS_ERR_NOTDETERMINIST=16
+SCHEMAS_ERR_CONSTRUCT=17
+SCHEMAS_ERR_INTERNAL=18
+SCHEMAS_ERR_NOTSIMPLE=19
+SCHEMAS_ERR_ATTRUNKNOWN=20
+SCHEMAS_ERR_ATTRINVALID=21
+SCHEMAS_ERR_VALUE=22
+SCHEMAS_ERR_FACET=23
+SCHEMAS_ERR_=24
+SCHEMAS_ERR_XXX=25
+""",)
+
+cdef object __RELAXNG_ERROR_TYPES
+__RELAXNG_ERROR_TYPES = ("""\
+RELAXNG_OK=0
+RELAXNG_ERR_MEMORY=1
+RELAXNG_ERR_TYPE=2
+RELAXNG_ERR_TYPEVAL=3
+RELAXNG_ERR_DUPID=4
+RELAXNG_ERR_TYPECMP=5
+RELAXNG_ERR_NOSTATE=6
+RELAXNG_ERR_NODEFINE=7
+RELAXNG_ERR_LISTEXTRA=8
+RELAXNG_ERR_LISTEMPTY=9
+RELAXNG_ERR_INTERNODATA=10
+RELAXNG_ERR_INTERSEQ=11
+RELAXNG_ERR_INTEREXTRA=12
+RELAXNG_ERR_ELEMNAME=13
+RELAXNG_ERR_ATTRNAME=14
+RELAXNG_ERR_ELEMNONS=15
+RELAXNG_ERR_ATTRNONS=16
+RELAXNG_ERR_ELEMWRONGNS=17
+RELAXNG_ERR_ATTRWRONGNS=18
+RELAXNG_ERR_ELEMEXTRANS=19
+RELAXNG_ERR_ATTREXTRANS=20
+RELAXNG_ERR_ELEMNOTEMPTY=21
+RELAXNG_ERR_NOELEM=22
+RELAXNG_ERR_NOTELEM=23
+RELAXNG_ERR_ATTRVALID=24
+RELAXNG_ERR_CONTENTVALID=25
+RELAXNG_ERR_EXTRACONTENT=26
+RELAXNG_ERR_INVALIDATTR=27
+RELAXNG_ERR_DATAELEM=28
+RELAXNG_ERR_VALELEM=29
+RELAXNG_ERR_LISTELEM=30
+RELAXNG_ERR_DATATYPE=31
+RELAXNG_ERR_VALUE=32
+RELAXNG_ERR_LIST=33
+RELAXNG_ERR_NOGRAMMAR=34
+RELAXNG_ERR_EXTRADATA=35
+RELAXNG_ERR_LACKDATA=36
+RELAXNG_ERR_INTERNAL=37
+RELAXNG_ERR_ELEMWRONG=38
+RELAXNG_ERR_TEXTWRONG=39
+""",)
# --- END: GENERATED CONSTANTS ---
__initErrorConstants()
Modified: lxml/trunk/update-error-constants.py
==============================================================================
--- lxml/trunk/update-error-constants.py (original)
+++ lxml/trunk/update-error-constants.py Fri Feb 15 15:15:14 2008
@@ -11,20 +11,24 @@
print sys.argv[0], "/path/to/libxml2-doc-dir"
sys.exit(len(sys.argv) > 1)
-HTML_FILE = os.path.join(sys.argv[1], 'html', 'libxml-xmlerror.html')
-os.stat(HTML_FILE) # raise an error if we can't find it
+HTML_DIR = os.path.join(sys.argv[1], 'html')
+os.stat(HTML_DIR) # raise an error if we can't find it
sys.path.insert(0, 'src')
from lxml import etree
# map enum name to Python variable name and alignment for constant name
ENUM_MAP = {
- 'xmlErrorLevel' : ('__ERROR_LEVELS', 'XML_ERR_'),
- 'xmlErrorDomain' : ('__ERROR_DOMAINS', 'XML_FROM_'),
- 'xmlParserErrors' : ('__ERROR_TYPES', 'XML_')
+ 'xmlErrorLevel' : ('__ERROR_LEVELS', 'XML_ERR_'),
+ 'xmlErrorDomain' : ('__ERROR_DOMAINS', 'XML_FROM_'),
+ 'xmlParserErrors' : ('__PARSER_ERROR_TYPES', 'XML_'),
+ 'xmlXPathError' : ('__XPATH_ERROR_TYPES', ''),
+ 'xmlSchemaValidError' : ('__XMLSCHEMA_ERROR_TYPES', 'XML_'),
+ 'xmlRelaxNGValidErr' : ('__RELAXNG_ERROR_TYPES', 'XML_'),
}
-ENUM_ORDER = ('xmlErrorLevel', 'xmlErrorDomain', 'xmlParserErrors')
+ENUM_ORDER = ('xmlErrorLevel', 'xmlErrorDomain', 'xmlParserErrors',
+ 'xmlXPathError', 'xmlSchemaValidError', 'xmlRelaxNGValidErr')
COMMENT = """
# This section is generated by the script '%s'.
@@ -61,27 +65,40 @@
f.write(''.join(post))
f.close()
-def parse_enums(html_file):
+def parse_enums(html_dir, html_filename, enum_dict):
PARSE_ENUM_NAME = re.compile('\s*enum\s+(\w+)\s*{', re.I).match
PARSE_ENUM_VALUE = re.compile('\s*=\s+([0-9]+)\s*(?::\s*(.*))?').match
- tree = etree.parse(html_file)
+ tree = etree.parse(os.path.join(html_dir, html_filename))
xpath = etree.XPathEvaluator(
tree, namespaces={'html' : 'http://www.w3.org/1999/xhtml'})
+ collect_text = etree.XPath("string()")
- enum_dict = {}
- enums = xpath.evaluate("//html:pre[@class = 'programlisting' and contains(text(), 'Enum') and html:a[@name]]")
+ enums = xpath.evaluate("//html:pre[@class = 'programlisting' and contains(text(), 'Enum')]")
for enum in enums:
- enum_name = PARSE_ENUM_NAME(enum.text).group(1)
+ enum_name = PARSE_ENUM_NAME(collect_text(enum))
+ if not enum_name or enum_name not in ENUM_MAP:
+ continue
+ enum_name = enum_name.group(1)
print "Found enum", enum_name
entries = []
- enum_dict[enum_name] = entries
for child in enum:
name = child.text
- value, descr = PARSE_ENUM_VALUE(child.tail).groups()
+ match = PARSE_ENUM_VALUE(child.tail)
+ if not match:
+ print("Ignoring enum %s (failed to parse field '%s')" % (
+ enum_name, name))
+ break
+ value, descr = match.groups()
entries.append((name, int(value), descr))
+ else:
+ enum_dict[enum_name] = entries
return enum_dict
-enum_dict = parse_enums(HTML_FILE)
+enum_dict = {}
+parse_enums(HTML_DIR, 'libxml-xmlerror.html', enum_dict)
+parse_enums(HTML_DIR, 'libxml-xpath.html', enum_dict)
+parse_enums(HTML_DIR, 'libxml-xmlschemas.html', enum_dict)
+parse_enums(HTML_DIR, 'libxml-relaxng.html', enum_dict)
# regenerate source files
pxi_result = []
From scoder at codespeak.net Fri Feb 15 15:15:15 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 15 Feb 2008 15:15:15 +0100 (CET)
Subject: [Lxml-checkins] r51536 - in lxml/trunk: . doc
Message-ID: <20080215141515.5EA26168487@codespeak.net>
Author: scoder
Date: Fri Feb 15 15:15:09 2008
New Revision: 51536
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/lxml-source-howto.txt
Log:
r3523 at delle: sbehnel | 2008-02-15 13:28:56 +0100
doc structure
Modified: lxml/trunk/doc/lxml-source-howto.txt
==============================================================================
--- lxml/trunk/doc/lxml-source-howto.txt (original)
+++ lxml/trunk/doc/lxml-source-howto.txt Fri Feb 15 15:15:09 2008
@@ -66,7 +66,7 @@
Concepts
-========
+--------
* proxies
* naming conventions
@@ -74,7 +74,7 @@
The documentation
-=================
+-----------------
* docs in ``doc`` directory
* `ReStructured Text`_ format
From scoder at codespeak.net Fri Feb 15 15:15:18 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 15 Feb 2008 15:15:18 +0100 (CET)
Subject: [Lxml-checkins] r51538 - lxml/trunk
Message-ID: <20080215141518.4F0261684C1@codespeak.net>
Author: scoder
Date: Fri Feb 15 15:15:17 2008
New Revision: 51538
Modified:
lxml/trunk/ (props changed)
lxml/trunk/update-error-constants.py
Log:
r3525 at delle: sbehnel | 2008-02-15 15:08:18 +0100
error type constants of XPath errors and XML Schema errors are not needed
Modified: lxml/trunk/update-error-constants.py
==============================================================================
--- lxml/trunk/update-error-constants.py (original)
+++ lxml/trunk/update-error-constants.py Fri Feb 15 15:15:17 2008
@@ -22,13 +22,18 @@
'xmlErrorLevel' : ('__ERROR_LEVELS', 'XML_ERR_'),
'xmlErrorDomain' : ('__ERROR_DOMAINS', 'XML_FROM_'),
'xmlParserErrors' : ('__PARSER_ERROR_TYPES', 'XML_'),
- 'xmlXPathError' : ('__XPATH_ERROR_TYPES', ''),
- 'xmlSchemaValidError' : ('__XMLSCHEMA_ERROR_TYPES', 'XML_'),
+# 'xmlXPathError' : ('__XPATH_ERROR_TYPES', ''),
+# 'xmlSchemaValidError' : ('__XMLSCHEMA_ERROR_TYPES', 'XML_'),
'xmlRelaxNGValidErr' : ('__RELAXNG_ERROR_TYPES', 'XML_'),
}
-ENUM_ORDER = ('xmlErrorLevel', 'xmlErrorDomain', 'xmlParserErrors',
- 'xmlXPathError', 'xmlSchemaValidError', 'xmlRelaxNGValidErr')
+ENUM_ORDER = (
+ 'xmlErrorLevel',
+ 'xmlErrorDomain',
+ 'xmlParserErrors',
+# 'xmlXPathError',
+# 'xmlSchemaValidError',
+ 'xmlRelaxNGValidErr')
COMMENT = """
# This section is generated by the script '%s'.
@@ -65,20 +70,23 @@
f.write(''.join(post))
f.close()
+collect_text = etree.XPath("string()")
+find_enums = etree.XPath(
+ "//html:pre[@class = 'programlisting' and contains(text(), 'Enum')]",
+ namespaces = {'html' : 'http://www.w3.org/1999/xhtml'})
+
def parse_enums(html_dir, html_filename, enum_dict):
PARSE_ENUM_NAME = re.compile('\s*enum\s+(\w+)\s*{', re.I).match
PARSE_ENUM_VALUE = re.compile('\s*=\s+([0-9]+)\s*(?::\s*(.*))?').match
tree = etree.parse(os.path.join(html_dir, html_filename))
- xpath = etree.XPathEvaluator(
- tree, namespaces={'html' : 'http://www.w3.org/1999/xhtml'})
- collect_text = etree.XPath("string()")
-
- enums = xpath.evaluate("//html:pre[@class = 'programlisting' and contains(text(), 'Enum')]")
+ enums = find_enums(tree)
for enum in enums:
enum_name = PARSE_ENUM_NAME(collect_text(enum))
- if not enum_name or enum_name not in ENUM_MAP:
+ if not enum_name:
continue
enum_name = enum_name.group(1)
+ if enum_name not in ENUM_MAP:
+ continue
print "Found enum", enum_name
entries = []
for child in enum:
@@ -96,8 +104,8 @@
enum_dict = {}
parse_enums(HTML_DIR, 'libxml-xmlerror.html', enum_dict)
-parse_enums(HTML_DIR, 'libxml-xpath.html', enum_dict)
-parse_enums(HTML_DIR, 'libxml-xmlschemas.html', enum_dict)
+#parse_enums(HTML_DIR, 'libxml-xpath.html', enum_dict)
+#parse_enums(HTML_DIR, 'libxml-xmlschemas.html', enum_dict)
parse_enums(HTML_DIR, 'libxml-relaxng.html', enum_dict)
# regenerate source files
From scoder at codespeak.net Fri Feb 15 15:15:22 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 15 Feb 2008 15:15:22 +0100 (CET)
Subject: [Lxml-checkins] r51539 - in lxml/trunk: . doc src/lxml
Message-ID: <20080215141522.3397A1684C2@codespeak.net>
Author: scoder
Date: Fri Feb 15 15:15:21 2008
New Revision: 51539
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/doc/validation.txt
lxml/trunk/src/lxml/xmlerror.pxd
lxml/trunk/src/lxml/xmlerror.pxi
Log:
r3526 at delle: sbehnel | 2008-02-15 15:12:34 +0100
error type constants of XPath errors and XML Schema errors are not needed
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Feb 15 15:15:21 2008
@@ -11,6 +11,8 @@
Bugs fixed
----------
+* Error type names in RelaxNG were reported incorrectly.
+
* Slice deletion bug fixed in objectify.
Other changes
Modified: lxml/trunk/doc/validation.txt
==============================================================================
--- lxml/trunk/doc/validation.txt (original)
+++ lxml/trunk/doc/validation.txt Fri Feb 15 15:15:21 2008
@@ -182,14 +182,23 @@
>>> log = relaxng.error_log
>>> print log.last_error
- :1:0:ERROR:RELAXNGV:ERR_LT_IN_ATTRIBUTE: Did not expect element c there
+ :1:0:ERROR:RELAXNGV:RELAXNG_ERR_ELEMWRONG: Did not expect element c there
You can see that the error (ERROR) happened during RelaxNG validation
-(RELAXNGV). The message then tells you what went wrong. Note that this error
-log is local to the RelaxNG object. It will only contain log entries that
-appeared during the validation. The DocumentInvalid exception raised by the
-``assertValid`` method above provides access to the global error log (like all
-other lxml exceptions).
+(RELAXNGV). The message then tells you what went wrong. You can also
+look at the error domain and its type directly::
+
+ >>> error = log.last_error
+ >>> print error.domain_name
+ RELAXNGV
+ >>> print error.type_name
+ RELAXNG_ERR_ELEMWRONG
+
+Note that this error log is local to the RelaxNG object. It will only
+contain log entries that appeared during the validation. The
+DocumentInvalid exception raised by the ``assertValid`` method above
+provides access to the global error log (like all other lxml
+exceptions).
Similar to XSLT, there's also a less efficient but easier shortcut method to
do one-shot RelaxNG validation::
Modified: lxml/trunk/src/lxml/xmlerror.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxd (original)
+++ lxml/trunk/src/lxml/xmlerror.pxd Fri Feb 15 15:15:21 2008
@@ -768,59 +768,6 @@
XML_CHECK_ = 6005 # 5033
XML_CHECK_X = 6006 # 503
- ctypedef enum xmlXPathError:
- XPATH_EXPRESSION_OK = 0
- XPATH_NUMBER_ERROR = 1
- XPATH_UNFINISHED_LITERAL_ERROR = 2
- XPATH_START_LITERAL_ERROR = 3
- XPATH_VARIABLE_REF_ERROR = 4
- XPATH_UNDEF_VARIABLE_ERROR = 5
- XPATH_INVALID_PREDICATE_ERROR = 6
- XPATH_EXPR_ERROR = 7
- XPATH_UNCLOSED_ERROR = 8
- XPATH_UNKNOWN_FUNC_ERROR = 9
- XPATH_INVALID_OPERAND = 10
- XPATH_INVALID_TYPE = 11
- XPATH_INVALID_ARITY = 12
- XPATH_INVALID_CTXT_SIZE = 13
- XPATH_INVALID_CTXT_POSITION = 14
- XPATH_MEMORY_ERROR = 15
- XPTR_SYNTAX_ERROR = 16
- XPTR_RESOURCE_ERROR = 17
- XPTR_SUB_RESOURCE_ERROR = 18
- XPATH_UNDEF_PREFIX_ERROR = 19
- XPATH_ENCODING_ERROR = 20
- XPATH_INVALID_CHAR_ERROR = 21
- XPATH_INVALID_CTXT = 22
-
- ctypedef enum xmlSchemaValidError:
- XML_SCHEMAS_ERR_OK = 0
- XML_SCHEMAS_ERR_NOROOT = 1
- XML_SCHEMAS_ERR_UNDECLAREDELEM = 2
- XML_SCHEMAS_ERR_NOTTOPLEVEL = 3
- XML_SCHEMAS_ERR_MISSING = 4
- XML_SCHEMAS_ERR_WRONGELEM = 5
- XML_SCHEMAS_ERR_NOTYPE = 6
- XML_SCHEMAS_ERR_NOROLLBACK = 7
- XML_SCHEMAS_ERR_ISABSTRACT = 8
- XML_SCHEMAS_ERR_NOTEMPTY = 9
- XML_SCHEMAS_ERR_ELEMCONT = 10
- XML_SCHEMAS_ERR_HAVEDEFAULT = 11
- XML_SCHEMAS_ERR_NOTNILLABLE = 12
- XML_SCHEMAS_ERR_EXTRACONTENT = 13
- XML_SCHEMAS_ERR_INVALIDATTR = 14
- XML_SCHEMAS_ERR_INVALIDELEM = 15
- XML_SCHEMAS_ERR_NOTDETERMINIST = 16
- XML_SCHEMAS_ERR_CONSTRUCT = 17
- XML_SCHEMAS_ERR_INTERNAL = 18
- XML_SCHEMAS_ERR_NOTSIMPLE = 19
- XML_SCHEMAS_ERR_ATTRUNKNOWN = 20
- XML_SCHEMAS_ERR_ATTRINVALID = 21
- XML_SCHEMAS_ERR_VALUE = 22
- XML_SCHEMAS_ERR_FACET = 23
- XML_SCHEMAS_ERR_ = 24
- XML_SCHEMAS_ERR_XXX = 25
-
ctypedef enum xmlRelaxNGValidErr:
XML_RELAXNG_OK = 0
XML_RELAXNG_ERR_MEMORY = 1
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Fri Feb 15 15:15:21 2008
@@ -88,7 +88,11 @@
property type_name:
def __get__(self):
- return ErrorTypes._getName(self.type, "unknown")
+ if self.domain == ErrorDomains.RELAXNGV:
+ getName = RelaxNGErrorTypes._getName
+ else:
+ getName = ErrorTypes._getName
+ return getName(self.type, "unknown")
property level_name:
def __get__(self):
@@ -526,8 +530,6 @@
const_defs = ((ErrorLevels, __ERROR_LEVELS),
(ErrorDomains, __ERROR_DOMAINS),
(ErrorTypes, __PARSER_ERROR_TYPES),
- (XPathErrorTypes, __XPATH_ERROR_TYPES),
- (XMLSchemaErrorTypes, __XMLSCHEMA_ERROR_TYPES),
(RelaxNGErrorTypes, __RELAXNG_ERROR_TYPES))
for cls, constant_tuple in const_defs:
reverse_dict = {}
@@ -540,6 +542,7 @@
python.PyObject_SetAttr(cls, name, value)
python.PyDict_SetItem(reverse_dict, value, name)
+
class ErrorLevels:
"Libxml2 error levels"
@@ -549,12 +552,6 @@
class ErrorTypes:
"Libxml2 error types"
-class XPathErrorTypes:
- "Libxml2 XPath error types"
-
-class XMLSchemaErrorTypes:
- "Libxml2 XML Schema error types"
-
class RelaxNGErrorTypes:
"Libxml2 RelaxNG error types"
@@ -1356,63 +1353,6 @@
CHECK_X=6006
""",)
-cdef object __XPATH_ERROR_TYPES
-__XPATH_ERROR_TYPES = ("""\
-XPATH_EXPRESSION_OK=0
-XPATH_NUMBER_ERROR=1
-XPATH_UNFINISHED_LITERAL_ERROR=2
-XPATH_START_LITERAL_ERROR=3
-XPATH_VARIABLE_REF_ERROR=4
-XPATH_UNDEF_VARIABLE_ERROR=5
-XPATH_INVALID_PREDICATE_ERROR=6
-XPATH_EXPR_ERROR=7
-XPATH_UNCLOSED_ERROR=8
-XPATH_UNKNOWN_FUNC_ERROR=9
-XPATH_INVALID_OPERAND=10
-XPATH_INVALID_TYPE=11
-XPATH_INVALID_ARITY=12
-XPATH_INVALID_CTXT_SIZE=13
-XPATH_INVALID_CTXT_POSITION=14
-XPATH_MEMORY_ERROR=15
-XPTR_SYNTAX_ERROR=16
-XPTR_RESOURCE_ERROR=17
-XPTR_SUB_RESOURCE_ERROR=18
-XPATH_UNDEF_PREFIX_ERROR=19
-XPATH_ENCODING_ERROR=20
-XPATH_INVALID_CHAR_ERROR=21
-XPATH_INVALID_CTXT=22
-""",)
-
-cdef object __XMLSCHEMA_ERROR_TYPES
-__XMLSCHEMA_ERROR_TYPES = ("""\
-SCHEMAS_ERR_OK=0
-SCHEMAS_ERR_NOROOT=1
-SCHEMAS_ERR_UNDECLAREDELEM=2
-SCHEMAS_ERR_NOTTOPLEVEL=3
-SCHEMAS_ERR_MISSING=4
-SCHEMAS_ERR_WRONGELEM=5
-SCHEMAS_ERR_NOTYPE=6
-SCHEMAS_ERR_NOROLLBACK=7
-SCHEMAS_ERR_ISABSTRACT=8
-SCHEMAS_ERR_NOTEMPTY=9
-SCHEMAS_ERR_ELEMCONT=10
-SCHEMAS_ERR_HAVEDEFAULT=11
-SCHEMAS_ERR_NOTNILLABLE=12
-SCHEMAS_ERR_EXTRACONTENT=13
-SCHEMAS_ERR_INVALIDATTR=14
-SCHEMAS_ERR_INVALIDELEM=15
-SCHEMAS_ERR_NOTDETERMINIST=16
-SCHEMAS_ERR_CONSTRUCT=17
-SCHEMAS_ERR_INTERNAL=18
-SCHEMAS_ERR_NOTSIMPLE=19
-SCHEMAS_ERR_ATTRUNKNOWN=20
-SCHEMAS_ERR_ATTRINVALID=21
-SCHEMAS_ERR_VALUE=22
-SCHEMAS_ERR_FACET=23
-SCHEMAS_ERR_=24
-SCHEMAS_ERR_XXX=25
-""",)
-
cdef object __RELAXNG_ERROR_TYPES
__RELAXNG_ERROR_TYPES = ("""\
RELAXNG_OK=0
From scoder at codespeak.net Fri Feb 15 15:48:31 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 15 Feb 2008 15:48:31 +0100 (CET)
Subject: [Lxml-checkins] r51541 - in lxml/trunk: . doc
Message-ID: <20080215144831.013BB16847B@codespeak.net>
Author: scoder
Date: Fri Feb 15 15:48:30 2008
New Revision: 51541
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/lxml-source-howto.txt
Log:
r3531 at delle: sbehnel | 2008-02-15 15:47:53 +0100
docs
Modified: lxml/trunk/doc/lxml-source-howto.txt
==============================================================================
--- lxml/trunk/doc/lxml-source-howto.txt (original)
+++ lxml/trunk/doc/lxml-source-howto.txt Fri Feb 15 15:48:30 2008
@@ -45,15 +45,21 @@
structs, and then calling it. The rest of the code is just plain
Python code.
+Even if you are not familiar with Cython, you should keep in mind that
+a slow implementation of a feature is better than none. So, if you
+want to contribute and have an idea what code you want to write, feel
+free to start with a pure Python implementation. Chances are, if you
+get the change officially accepted and integrated, others will take
+the time to optimise it so that it runs fast in Cython.
+
Where to start?
===============
First of all, read `how to build lxml from sources` to learn how to
-retrieve the source code from the Subversion repository and how to set
-up a build environment. The source code lives in the subdirectory
-``src`` of the checkout. The documentation (which is written in the
-`ReStructured Text`_ format) lives in the ``doc`` directory.
+retrieve the source code from the Subversion repository and how to
+build it. The source code lives in the subdirectory ``src`` of the
+checkout.
The main extension modules in lxml are ``lxml.etree`` and
``lxml.objectify``. All main modules have the file extension
@@ -65,14 +71,6 @@
Cython modules.
-Concepts
---------
-
-* proxies
-* naming conventions
-* ...
-
-
The documentation
-----------------
@@ -82,10 +80,18 @@
* ...
+Concepts
+--------
+
+* proxies
+* naming conventions
+* ...
+
+
lxml.etree
==========
-The main module, ``lxml.etree``, is in the file ``lxml.etree.pyx``.
+The main module, ``lxml.etree``, is in the file **lxml.etree.pyx**.
It implements the main functions and types of the ElementTree API, and
is therefore the best place to start if you want to find out how a
specific feature is implemented.
@@ -93,10 +99,10 @@
At the very end of the file, it contains a series of ``include``
statements that merge the rest of the implementation into the
generated C code. Yes, you read right: no importing, no source file
-namespacing, just plain good old include and a huge C code result that
-we throw right into the C compiler.
+namespacing, just plain good old include and a huge C code result of
+more than 100,000 lines that we throw right into the C compiler.
-The main files are:
+The main include files are:
proxy.pxi:
@@ -155,28 +161,45 @@
serializer.pxi:
- XML output functions
+ XML output functions. Basically everything that creates byte
+ sequences from XML trees.
iterparse.pxi:
- incremental XML parsing
+ Incremental XML parsing. An iterator class that builds iterparse
+ events while parsing.
xmlid.pxi:
- XMLID and IDDict
+ XMLID and IDDict, a dictionary-like way to find Elements by their
+ XML-ID attribute.
xinclude.pxi:
- XInclude
+ XInclude implementation.
extensions.pxi:
- XPath/XSLT extension functions
+ Infrastructure for extension functions in XPath/XSLT, including
+ XPath value conversion and function registration.
xpath.pxi:
- XPath evaluation
+ XPath evaluators.
xslt.pxi:
- XSL transformations
+ XSL transformations, including the ``XSLT`` class, document lookup
+ handling and access control.
+
+
+lxml.objectify
+==============
+
+* ...
+
+
+lxml.html
+=========
+
+* ...
From scoder at codespeak.net Fri Feb 15 17:32:03 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 15 Feb 2008 17:32:03 +0100 (CET)
Subject: [Lxml-checkins] r51544 - in lxml/branch/lxml-2.0: . doc src/lxml
Message-ID: <20080215163203.CB1F5168437@codespeak.net>
Author: scoder
Date: Fri Feb 15 17:32:03 2008
New Revision: 51544
Added:
lxml/branch/lxml-2.0/doc/lxml-source-howto.txt
- copied unchanged from r51543, lxml/trunk/doc/lxml-source-howto.txt
Modified:
lxml/branch/lxml-2.0/CHANGES.txt
lxml/branch/lxml-2.0/doc/build.txt
lxml/branch/lxml-2.0/doc/mkhtml.py
lxml/branch/lxml-2.0/doc/objectify.txt
lxml/branch/lxml-2.0/doc/pyrex.txt
lxml/branch/lxml-2.0/doc/validation.txt
lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx
lxml/branch/lxml-2.0/src/lxml/xmlerror.pxd
lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi
lxml/branch/lxml-2.0/update-error-constants.py
Log:
trunk merge
Modified: lxml/branch/lxml-2.0/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-2.0/CHANGES.txt (original)
+++ lxml/branch/lxml-2.0/CHANGES.txt Fri Feb 15 17:32:03 2008
@@ -2,6 +2,23 @@
lxml changelog
==============
+2.0.2 (Under development)
+=========================
+
+Features added
+--------------
+
+Bugs fixed
+----------
+
+* Error type names in RelaxNG were reported incorrectly.
+
+* Slice deletion bug fixed in objectify.
+
+Other changes
+-------------
+
+
2.0.1 (2008-02-13)
==================
Modified: lxml/branch/lxml-2.0/doc/build.txt
==============================================================================
--- lxml/branch/lxml-2.0/doc/build.txt (original)
+++ lxml/branch/lxml-2.0/doc/build.txt Fri Feb 15 17:32:03 2008
@@ -44,10 +44,10 @@
want to be an lxml developer, then you do need a working Cython
installation. You can use EasyInstall_ to install it::
- easy_install Cython==0.9.6.11b
+ easy_install Cython==0.9.6.12
-lxml currently requires Cython 0.9.6.11b, later versions were not
-tested.
+lxml currently requires Cython 0.9.6.11b or 0.9.6.12, later versions
+were not tested.
Subversion
Modified: lxml/branch/lxml-2.0/doc/mkhtml.py
==============================================================================
--- lxml/branch/lxml-2.0/doc/mkhtml.py (original)
+++ lxml/branch/lxml-2.0/doc/mkhtml.py Fri Feb 15 17:32:03 2008
@@ -3,13 +3,14 @@
SITE_STRUCTURE = [
('lxml', ('main.txt', 'intro.txt', 'lxml2.txt', 'FAQ.txt',
- 'compatibility.txt', 'performance.txt', 'build.txt')),
+ 'compatibility.txt', 'performance.txt')),
('Developing with lxml', ('tutorial.txt', 'api.txt', 'parsing.txt',
'validation.txt', 'xpathxslt.txt',
'objectify.txt', 'lxmlhtml.txt',
'cssselect.txt', 'elementsoup.txt')),
('Extending lxml', ('resolvers.txt', 'extensions.txt',
'element_classes.txt', 'sax.txt', 'capi.txt')),
+ ('Developing lxml', ('build.txt', 'lxml-source-howto.txt')),
]
RST2HTML_OPTIONS = " ".join([
Modified: lxml/branch/lxml-2.0/doc/objectify.txt
==============================================================================
--- lxml/branch/lxml-2.0/doc/objectify.txt (original)
+++ lxml/branch/lxml-2.0/doc/objectify.txt Fri Feb 15 17:32:03 2008
@@ -16,31 +16,6 @@
used. Python data types are extracted from XML content automatically and made
available to the normal Python operators.
-To set up and use ``objectify``, you need both the ``lxml.etree`` module and
-``lxml.objectify``::
-
- >>> from lxml import etree
- >>> from lxml import objectify
-
-The objectify API is very different from the ElementTree API. If it
-is used, it should not be mixed with other element implementations
-(such as trees parsed with ``lxml.etree``), to avoid non-obvious
-behaviour.
-
-The `benchmark page`_ has some hints on performance optimisation of code using
-lxml.objectify.
-
-To make the doctests in this document look a little nicer, we also use this:
-
- >>> import lxml.usedoctest
-
-Imported from within a doctest, this relieves us from caring about the exact
-formatting of XML output.
-
-.. _Amara: http://uche.ogbuji.net/tech/4suite/amara/
-.. _gnosis.xml.objectify: http://gnosis.cx/download/
-.. _`benchmark page`: performance.html#lxml-objectify
-
.. contents::
..
1 The lxml.objectify API
@@ -61,6 +36,33 @@
5.5 Advanced element class lookup
6 What is different from lxml.etree?
+.. _Amara: http://uche.ogbuji.net/tech/4suite/amara/
+.. _gnosis.xml.objectify: http://gnosis.cx/download/
+.. _`benchmark page`: performance.html#lxml-objectify
+
+
+To set up and use ``objectify``, you need both the ``lxml.etree``
+module and ``lxml.objectify``::
+
+ >>> from lxml import etree
+ >>> from lxml import objectify
+
+The objectify API is very different from the ElementTree API. If it
+is used, it should not be mixed with other element implementations
+(such as trees parsed with ``lxml.etree``), to avoid non-obvious
+behaviour.
+
+The `benchmark page`_ has some hints on performance optimisation of
+code using lxml.objectify.
+
+To make the doctests in this document look a little nicer, we also use
+this::
+
+ >>> import lxml.usedoctest
+
+Imported from within a doctest, this relieves us from caring about the exact
+formatting of XML output.
+
The lxml.objectify API
======================
Modified: lxml/branch/lxml-2.0/doc/pyrex.txt
==============================================================================
--- lxml/branch/lxml-2.0/doc/pyrex.txt (original)
+++ lxml/branch/lxml-2.0/doc/pyrex.txt Fri Feb 15 17:32:03 2008
@@ -22,4 +22,4 @@
clear description of what you did to run into the problems and provide the
compiler output that shows the error.
-.. Pyrex_: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/
+.. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/
Modified: lxml/branch/lxml-2.0/doc/validation.txt
==============================================================================
--- lxml/branch/lxml-2.0/doc/validation.txt (original)
+++ lxml/branch/lxml-2.0/doc/validation.txt Fri Feb 15 17:32:03 2008
@@ -182,14 +182,23 @@
>>> log = relaxng.error_log
>>> print log.last_error
- :1:0:ERROR:RELAXNGV:ERR_LT_IN_ATTRIBUTE: Did not expect element c there
+ :1:0:ERROR:RELAXNGV:RELAXNG_ERR_ELEMWRONG: Did not expect element c there
You can see that the error (ERROR) happened during RelaxNG validation
-(RELAXNGV). The message then tells you what went wrong. Note that this error
-log is local to the RelaxNG object. It will only contain log entries that
-appeared during the validation. The DocumentInvalid exception raised by the
-``assertValid`` method above provides access to the global error log (like all
-other lxml exceptions).
+(RELAXNGV). The message then tells you what went wrong. You can also
+look at the error domain and its type directly::
+
+ >>> error = log.last_error
+ >>> print error.domain_name
+ RELAXNGV
+ >>> print error.type_name
+ RELAXNG_ERR_ELEMWRONG
+
+Note that this error log is local to the RelaxNG object. It will only
+contain log entries that appeared during the validation. The
+DocumentInvalid exception raised by the ``assertValid`` method above
+provides access to the global error log (like all other lxml
+exceptions).
Similar to XSLT, there's also a less efficient but easier shortcut method to
do one-shot RelaxNG validation::
Modified: lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx (original)
+++ lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx Fri Feb 15 17:32:03 2008
@@ -317,26 +317,17 @@
def __delitem__(self, key):
cdef Py_ssize_t start, stop, step, slicelength
+ parent = self.getparent()
+ if parent is None:
+ raise TypeError("deleting items not supported by root element")
if python.PySlice_Check(key):
# slice deletion
- python.PySlice_GetIndicesEx(
- key, _countSiblings(self._c_node),
- &start, &stop, &step, &slicelength)
- parent = self.getparent()
- if parent is None:
- raise TypeError("deleting slices of root element not supported")
- if step < 0:
- del_items = list(self)[start:stop:step]
- else:
- del_items = list(islice(self, start, stop, step))
+ del_items = list(self)[key]
remove = parent.remove
for el in del_items:
remove(el)
else:
# normal index deletion
- parent = self.getparent()
- if parent is None:
- raise TypeError("deleting items not supported by root element")
sibling = self.__getitem__(key)
parent.remove(sibling)
Modified: lxml/branch/lxml-2.0/src/lxml/xmlerror.pxd
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/xmlerror.pxd (original)
+++ lxml/branch/lxml-2.0/src/lxml/xmlerror.pxd Fri Feb 15 17:32:03 2008
@@ -767,6 +767,48 @@
XML_I18N_NO_OUTPUT = 6004 # 6004
XML_CHECK_ = 6005 # 5033
XML_CHECK_X = 6006 # 503
+
+ ctypedef enum xmlRelaxNGValidErr:
+ XML_RELAXNG_OK = 0
+ XML_RELAXNG_ERR_MEMORY = 1
+ XML_RELAXNG_ERR_TYPE = 2
+ XML_RELAXNG_ERR_TYPEVAL = 3
+ XML_RELAXNG_ERR_DUPID = 4
+ XML_RELAXNG_ERR_TYPECMP = 5
+ XML_RELAXNG_ERR_NOSTATE = 6
+ XML_RELAXNG_ERR_NODEFINE = 7
+ XML_RELAXNG_ERR_LISTEXTRA = 8
+ XML_RELAXNG_ERR_LISTEMPTY = 9
+ XML_RELAXNG_ERR_INTERNODATA = 10
+ XML_RELAXNG_ERR_INTERSEQ = 11
+ XML_RELAXNG_ERR_INTEREXTRA = 12
+ XML_RELAXNG_ERR_ELEMNAME = 13
+ XML_RELAXNG_ERR_ATTRNAME = 14
+ XML_RELAXNG_ERR_ELEMNONS = 15
+ XML_RELAXNG_ERR_ATTRNONS = 16
+ XML_RELAXNG_ERR_ELEMWRONGNS = 17
+ XML_RELAXNG_ERR_ATTRWRONGNS = 18
+ XML_RELAXNG_ERR_ELEMEXTRANS = 19
+ XML_RELAXNG_ERR_ATTREXTRANS = 20
+ XML_RELAXNG_ERR_ELEMNOTEMPTY = 21
+ XML_RELAXNG_ERR_NOELEM = 22
+ XML_RELAXNG_ERR_NOTELEM = 23
+ XML_RELAXNG_ERR_ATTRVALID = 24
+ XML_RELAXNG_ERR_CONTENTVALID = 25
+ XML_RELAXNG_ERR_EXTRACONTENT = 26
+ XML_RELAXNG_ERR_INVALIDATTR = 27
+ XML_RELAXNG_ERR_DATAELEM = 28
+ XML_RELAXNG_ERR_VALELEM = 29
+ XML_RELAXNG_ERR_LISTELEM = 30
+ XML_RELAXNG_ERR_DATATYPE = 31
+ XML_RELAXNG_ERR_VALUE = 32
+ XML_RELAXNG_ERR_LIST = 33
+ XML_RELAXNG_ERR_NOGRAMMAR = 34
+ XML_RELAXNG_ERR_EXTRADATA = 35
+ XML_RELAXNG_ERR_LACKDATA = 36
+ XML_RELAXNG_ERR_INTERNAL = 37
+ XML_RELAXNG_ERR_ELEMWRONG = 38
+ XML_RELAXNG_ERR_TEXTWRONG = 39
# --- END: GENERATED CONSTANTS ---
cdef extern from "libxml/xmlerror.h":
Modified: lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi Fri Feb 15 17:32:03 2008
@@ -88,7 +88,11 @@
property type_name:
def __get__(self):
- return ErrorTypes._getName(self.type, "unknown")
+ if self.domain == ErrorDomains.RELAXNGV:
+ getName = RelaxNGErrorTypes._getName
+ else:
+ getName = ErrorTypes._getName
+ return getName(self.type, "unknown")
property level_name:
def __get__(self):
@@ -523,9 +527,10 @@
cdef void __initErrorConstants():
"Called at setup time to parse the constants and build the classes below."
find_constants = re.compile(r"\s*([a-zA-Z0-9_]+)\s*=\s*([0-9]+)").findall
- const_defs = ((ErrorLevels, __ERROR_LEVELS),
- (ErrorDomains, __ERROR_DOMAINS),
- (ErrorTypes, __ERROR_TYPES))
+ const_defs = ((ErrorLevels, __ERROR_LEVELS),
+ (ErrorDomains, __ERROR_DOMAINS),
+ (ErrorTypes, __PARSER_ERROR_TYPES),
+ (RelaxNGErrorTypes, __RELAXNG_ERROR_TYPES))
for cls, constant_tuple in const_defs:
reverse_dict = {}
cls._names = reverse_dict
@@ -537,6 +542,7 @@
python.PyObject_SetAttr(cls, name, value)
python.PyDict_SetItem(reverse_dict, value, name)
+
class ErrorLevels:
"Libxml2 error levels"
@@ -546,6 +552,9 @@
class ErrorTypes:
"Libxml2 error types"
+class RelaxNGErrorTypes:
+ "Libxml2 RelaxNG error types"
+
# --- BEGIN: GENERATED CONSTANTS ---
# This section is generated by the script 'update-error-constants.py'.
@@ -596,8 +605,8 @@
I18N=27
""",)
-cdef object __ERROR_TYPES
-__ERROR_TYPES = ("""\
+cdef object __PARSER_ERROR_TYPES
+__PARSER_ERROR_TYPES = ("""\
ERR_OK=0
ERR_INTERNAL_ERROR=1
ERR_NO_MEMORY=2
@@ -1343,6 +1352,50 @@
CHECK_=6005
CHECK_X=6006
""",)
+
+cdef object __RELAXNG_ERROR_TYPES
+__RELAXNG_ERROR_TYPES = ("""\
+RELAXNG_OK=0
+RELAXNG_ERR_MEMORY=1
+RELAXNG_ERR_TYPE=2
+RELAXNG_ERR_TYPEVAL=3
+RELAXNG_ERR_DUPID=4
+RELAXNG_ERR_TYPECMP=5
+RELAXNG_ERR_NOSTATE=6
+RELAXNG_ERR_NODEFINE=7
+RELAXNG_ERR_LISTEXTRA=8
+RELAXNG_ERR_LISTEMPTY=9
+RELAXNG_ERR_INTERNODATA=10
+RELAXNG_ERR_INTERSEQ=11
+RELAXNG_ERR_INTEREXTRA=12
+RELAXNG_ERR_ELEMNAME=13
+RELAXNG_ERR_ATTRNAME=14
+RELAXNG_ERR_ELEMNONS=15
+RELAXNG_ERR_ATTRNONS=16
+RELAXNG_ERR_ELEMWRONGNS=17
+RELAXNG_ERR_ATTRWRONGNS=18
+RELAXNG_ERR_ELEMEXTRANS=19
+RELAXNG_ERR_ATTREXTRANS=20
+RELAXNG_ERR_ELEMNOTEMPTY=21
+RELAXNG_ERR_NOELEM=22
+RELAXNG_ERR_NOTELEM=23
+RELAXNG_ERR_ATTRVALID=24
+RELAXNG_ERR_CONTENTVALID=25
+RELAXNG_ERR_EXTRACONTENT=26
+RELAXNG_ERR_INVALIDATTR=27
+RELAXNG_ERR_DATAELEM=28
+RELAXNG_ERR_VALELEM=29
+RELAXNG_ERR_LISTELEM=30
+RELAXNG_ERR_DATATYPE=31
+RELAXNG_ERR_VALUE=32
+RELAXNG_ERR_LIST=33
+RELAXNG_ERR_NOGRAMMAR=34
+RELAXNG_ERR_EXTRADATA=35
+RELAXNG_ERR_LACKDATA=36
+RELAXNG_ERR_INTERNAL=37
+RELAXNG_ERR_ELEMWRONG=38
+RELAXNG_ERR_TEXTWRONG=39
+""",)
# --- END: GENERATED CONSTANTS ---
__initErrorConstants()
Modified: lxml/branch/lxml-2.0/update-error-constants.py
==============================================================================
--- lxml/branch/lxml-2.0/update-error-constants.py (original)
+++ lxml/branch/lxml-2.0/update-error-constants.py Fri Feb 15 17:32:03 2008
@@ -11,20 +11,29 @@
print sys.argv[0], "/path/to/libxml2-doc-dir"
sys.exit(len(sys.argv) > 1)
-HTML_FILE = os.path.join(sys.argv[1], 'html', 'libxml-xmlerror.html')
-os.stat(HTML_FILE) # raise an error if we can't find it
+HTML_DIR = os.path.join(sys.argv[1], 'html')
+os.stat(HTML_DIR) # raise an error if we can't find it
sys.path.insert(0, 'src')
from lxml import etree
# map enum name to Python variable name and alignment for constant name
ENUM_MAP = {
- 'xmlErrorLevel' : ('__ERROR_LEVELS', 'XML_ERR_'),
- 'xmlErrorDomain' : ('__ERROR_DOMAINS', 'XML_FROM_'),
- 'xmlParserErrors' : ('__ERROR_TYPES', 'XML_')
+ 'xmlErrorLevel' : ('__ERROR_LEVELS', 'XML_ERR_'),
+ 'xmlErrorDomain' : ('__ERROR_DOMAINS', 'XML_FROM_'),
+ 'xmlParserErrors' : ('__PARSER_ERROR_TYPES', 'XML_'),
+# 'xmlXPathError' : ('__XPATH_ERROR_TYPES', ''),
+# 'xmlSchemaValidError' : ('__XMLSCHEMA_ERROR_TYPES', 'XML_'),
+ 'xmlRelaxNGValidErr' : ('__RELAXNG_ERROR_TYPES', 'XML_'),
}
-ENUM_ORDER = ('xmlErrorLevel', 'xmlErrorDomain', 'xmlParserErrors')
+ENUM_ORDER = (
+ 'xmlErrorLevel',
+ 'xmlErrorDomain',
+ 'xmlParserErrors',
+# 'xmlXPathError',
+# 'xmlSchemaValidError',
+ 'xmlRelaxNGValidErr')
COMMENT = """
# This section is generated by the script '%s'.
@@ -61,27 +70,43 @@
f.write(''.join(post))
f.close()
-def parse_enums(html_file):
+collect_text = etree.XPath("string()")
+find_enums = etree.XPath(
+ "//html:pre[@class = 'programlisting' and contains(text(), 'Enum')]",
+ namespaces = {'html' : 'http://www.w3.org/1999/xhtml'})
+
+def parse_enums(html_dir, html_filename, enum_dict):
PARSE_ENUM_NAME = re.compile('\s*enum\s+(\w+)\s*{', re.I).match
PARSE_ENUM_VALUE = re.compile('\s*=\s+([0-9]+)\s*(?::\s*(.*))?').match
- tree = etree.parse(html_file)
- xpath = etree.XPathEvaluator(
- tree, namespaces={'html' : 'http://www.w3.org/1999/xhtml'})
-
- enum_dict = {}
- enums = xpath.evaluate("//html:pre[@class = 'programlisting' and contains(text(), 'Enum') and html:a[@name]]")
+ tree = etree.parse(os.path.join(html_dir, html_filename))
+ enums = find_enums(tree)
for enum in enums:
- enum_name = PARSE_ENUM_NAME(enum.text).group(1)
+ enum_name = PARSE_ENUM_NAME(collect_text(enum))
+ if not enum_name:
+ continue
+ enum_name = enum_name.group(1)
+ if enum_name not in ENUM_MAP:
+ continue
print "Found enum", enum_name
entries = []
- enum_dict[enum_name] = entries
for child in enum:
name = child.text
- value, descr = PARSE_ENUM_VALUE(child.tail).groups()
+ match = PARSE_ENUM_VALUE(child.tail)
+ if not match:
+ print("Ignoring enum %s (failed to parse field '%s')" % (
+ enum_name, name))
+ break
+ value, descr = match.groups()
entries.append((name, int(value), descr))
+ else:
+ enum_dict[enum_name] = entries
return enum_dict
-enum_dict = parse_enums(HTML_FILE)
+enum_dict = {}
+parse_enums(HTML_DIR, 'libxml-xmlerror.html', enum_dict)
+#parse_enums(HTML_DIR, 'libxml-xpath.html', enum_dict)
+#parse_enums(HTML_DIR, 'libxml-xmlschemas.html', enum_dict)
+parse_enums(HTML_DIR, 'libxml-relaxng.html', enum_dict)
# regenerate source files
pxi_result = []
From scoder at codespeak.net Mon Feb 18 11:15:01 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 18 Feb 2008 11:15:01 +0100 (CET)
Subject: [Lxml-checkins] r51574 - in lxml/trunk: . doc
Message-ID: <20080218101501.4F32C16842D@codespeak.net>
Author: scoder
Date: Mon Feb 18 11:15:00 2008
New Revision: 51574
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/lxml-source-howto.txt
Log:
r3535 at delle: sbehnel | 2008-02-15 18:56:52 +0100
source howto update
Modified: lxml/trunk/doc/lxml-source-howto.txt
==============================================================================
--- lxml/trunk/doc/lxml-source-howto.txt (original)
+++ lxml/trunk/doc/lxml-source-howto.txt Mon Feb 18 11:15:00 2008
@@ -16,9 +16,19 @@
.. _lxml: http://codespeak.net/lxml
.. _`how to build lxml from sources`: build.html
.. _`ReStructured Text`: http://docutils.sourceforge.net/rst.html
+.. _epydoc: http://epydoc.sourceforge.net/
+.. _docutils: http://docutils.sourceforge.net/
+.. _`C-level API`: capi.html
.. contents::
..
+ 1 What is Cython?
+ 2 Where to start?
+ 2.1 Concepts
+ 2.2 The documentation
+ 3 lxml.etree
+ 4 lxml.objectify
+ 5 lxml.html
What is Cython?
@@ -29,21 +39,23 @@
Cython_ is the language that lxml is written in. It is a very
Python-like language that was specifically designed for writing Python
-extension modules. The language is so close to Python that the Cython
-compiler can actually compile many, many Python programs to C without
-major modifications. But the real speed gains of a C compilation come
-from type annotations that were added to the language and that allow
-Cython to generate very efficient C code.
+extension modules.
The reason why Cython (or actually its predecessor Pyrex_ at the time)
was chosen as an implementation language for lxml, is that it makes it
very easy to interface with both the Python world and external C code.
Cython generates all the necessary glue code for the Python API,
-including Python types and reference counting for Python objects.
-Calling into C code is not more than declaring the signature of the
-function and maybe some variables as being C types, pointers or
-structs, and then calling it. The rest of the code is just plain
-Python code.
+including Python types, calling conventions and reference counting.
+On the other side of the table, calling into C code is not more than
+declaring the signature of the function and maybe some variables as
+being C types, pointers or structs, and then calling it. The rest of
+the code is just plain Python code.
+
+The Cython language is so close to Python that the Cython compiler can
+actually compile many, many Python programs to C without major
+modifications. But the real speed gains of a C compilation come from
+type annotations that were added to the language and that allow Cython
+to generate very efficient C code.
Even if you are not familiar with Cython, you should keep in mind that
a slow implementation of a feature is better than none. So, if you
@@ -56,7 +68,7 @@
Where to start?
===============
-First of all, read `how to build lxml from sources` to learn how to
+First of all, read `how to build lxml from sources`_ to learn how to
retrieve the source code from the Subversion repository and how to
build it. The source code lives in the subdirectory ``src`` of the
checkout.
@@ -65,21 +77,12 @@
``lxml.objectify``. All main modules have the file extension
``.pyx``, which shows the descendence from Pyrex. As usual in Python,
the main files start with a short description and a couple of imports.
-Cython destinguishes between the run-time ``import`` statement (as
+Cython distinguishes between the run-time ``import`` statement (as
known from Python) and the compile-time ``cimport`` statement, which
imports C declarations, either from external libraries or from other
Cython modules.
-The documentation
------------------
-
-* docs in ``doc`` directory
-* `ReStructured Text`_ format
-* generated through ``mkhtml.py`` script
-* ...
-
-
Concepts
--------
@@ -88,6 +91,38 @@
* ...
+The documentation
+-----------------
+
+An important part of lxml is the documentation that lives in the
+``doc`` directory. It describes a large part of the API and comprises
+a lot of example code in the form of doctests.
+
+The documentation is written in the `ReStructured Text`_ format, a
+very powerful text markup language that looks almost like plain text.
+It is part of the docutils_ package.
+
+The project web site of lxml_ is completely generated from these text
+documents. Even the side menu is just collected from the table of
+contents that the ReST processor writes into each HTML page.
+Obviously, we use lxml for this.
+
+The easiest way to generate the HTML pages is by calling::
+
+ make html
+
+This will call the script ``doc/mkhtml.py`` to run the ReST processor
+on the files. After generating an HTML page the script parses it back
+in to build the side menu, and injects the complete menu into each
+page at the very end.
+
+Running the ``make`` command will also generate the API documentation
+if you have epydoc_ installed. The epydoc package will import and
+introspect the extension modules and also introspect and parse the
+Python modules of lxml. The aggregated information will then be
+written out into an HTML documentation site.
+
+
lxml.etree
==========
@@ -104,14 +139,7 @@
The main include files are:
-proxy.pxi:
-
- Very low-level functions for memory allocation/deallocation
- and Element proxy handling. Ignoring this for the beginning
- will keep your head from exploding.
-
-apihelpers.pxi:
-
+apihelpers.pxi
Private C helper functions. Most of the little functions that are
used all over the place are defined here. This includes things
like reading out the text content of a libxml2 tree node, checking
@@ -120,77 +148,100 @@
should keep these functions in the back of your head, as they will
definitely make your life easier.
-xmlerror.pxi:
-
- Error log handling. All error messages that libxml2 generates
- internally walk through the code in this file to end up in lxml's
- Python level error logs.
-
- At the end of the file, you will find a long list of named error
- codes. It is generated from the libxml2 HTML documentation (using
- lxml, of course). See the script ``update-error-constants.py``
- for this.
-
-classlookup.pxi:
-
+classlookup.pxi
Element class lookup mechanisms. The main API and engines for
those who want to define custom Element classes and inject them
into lxml.
-nsclasses.pxi:
+docloader.pxi
+ Support for custom document loaders. Base class and registry for
+ custom document resolvers.
+
+extensions.pxi
+ Infrastructure for extension functions in XPath/XSLT, including
+ XPath value conversion and function registration.
+iterparse.pxi
+ Incremental XML parsing. An iterator class that builds iterparse
+ events while parsing.
+
+nsclasses.pxi
Namespace implementation and registry. The registry and engine
for Element classes that use the ElementNamespaceClassLookup
scheme.
-docloader.pxi:
-
- Support for custom document loaders. Base class and registry for
- custom document resolvers.
-
-parser.pxi:
-
+parser.pxi
Parsers for XML and HTML. This is the main parser engine. It's
the reason why you can parse a document from various sources in
two lines of Python code. It's definitely not the right place to
start reading lxml's soure code.
-parsertarget.pxi:
+parsertarget.pxi
+ An ElementTree compatible parser target implementation based on
+ the SAX2 interface of libxml2.
- ET Parser target.
+proxy.pxi
+ Very low-level functions for memory allocation/deallocation
+ and Element proxy handling. Ignoring this for the beginning
+ will safe your head from exploding.
-serializer.pxi:
+public-api.pxi
+ The set of C functions that are exported to other extension
+ modules at the C level. For example, ``lxml.objectify`` makes use
+ of these. See the `C-level API` documentation.
+serializer.pxi
XML output functions. Basically everything that creates byte
sequences from XML trees.
-iterparse.pxi:
+xinclude.pxi
+ XInclude implementation.
- Incremental XML parsing. An iterator class that builds iterparse
- events while parsing.
+xmlerror.pxi
+ Error log handling. All error messages that libxml2 generates
+ internally walk through the code in this file to end up in lxml's
+ Python level error logs.
-xmlid.pxi:
+ At the end of the file, you will find a long list of named error
+ codes. It is generated from the libxml2 HTML documentation (using
+ lxml, of course). See the script ``update-error-constants.py``
+ for this.
+xmlid.pxi
XMLID and IDDict, a dictionary-like way to find Elements by their
XML-ID attribute.
-xinclude.pxi:
+xpath.pxi
+ XPath evaluators.
- XInclude implementation.
+xslt.pxi
+ XSL transformations, including the ``XSLT`` class, document lookup
+ handling and access control.
-extensions.pxi:
+The different schema languages (DTD, RelaxNG, XML Schema and
+Schematron) are implemented in the following include files:
- Infrastructure for extension functions in XPath/XSLT, including
- XPath value conversion and function registration.
+* dtd.pxi
+* relaxng.pxi
+* schematron.pxi
+* xmlschema.pxi
-xpath.pxi:
- XPath evaluators.
+Python modules
+==============
-xslt.pxi:
+The ``lxml`` package also contains a number of pure Python modules:
- XSL transformations, including the ``XSLT`` class, document lookup
- handling and access control.
+builder.py
+ The E-factory and the ElementBuilder class. These provide a
+ simple interface to XML tree generation.
+
+cssselect.py
+ A CSS selector implementation based on XPath. The main class is
+ called ``CSSSelector``.
+
+doctestcompare.py
+
lxml.objectify
From scoder at codespeak.net Mon Feb 18 11:15:06 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 18 Feb 2008 11:15:06 +0100 (CET)
Subject: [Lxml-checkins] r51575 - in lxml/trunk: . doc
Message-ID: <20080218101506.1BBDA1684CD@codespeak.net>
Author: scoder
Date: Mon Feb 18 11:15:05 2008
New Revision: 51575
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/lxml-source-howto.txt
Log:
r3536 at delle: sbehnel | 2008-02-18 10:34:18 +0100
source doc
Modified: lxml/trunk/doc/lxml-source-howto.txt
==============================================================================
--- lxml/trunk/doc/lxml-source-howto.txt (original)
+++ lxml/trunk/doc/lxml-source-howto.txt Mon Feb 18 11:15:05 2008
@@ -241,7 +241,19 @@
called ``CSSSelector``.
doctestcompare.py
-
+ ...
+
+ElementInclude.py
+ ...
+
+_elementpath.py
+ ...
+
+sax.py
+ ...
+
+usedoctest.py
+ ...
lxml.objectify
From scoder at codespeak.net Mon Feb 18 11:19:44 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 18 Feb 2008 11:19:44 +0100 (CET)
Subject: [Lxml-checkins] r51576 - lxml/branch/lxml-2.0/doc
Message-ID: <20080218101944.0981616842D@codespeak.net>
Author: scoder
Date: Mon Feb 18 11:19:44 2008
New Revision: 51576
Modified:
lxml/branch/lxml-2.0/doc/lxml-source-howto.txt
Log:
trunk merge
Modified: lxml/branch/lxml-2.0/doc/lxml-source-howto.txt
==============================================================================
--- lxml/branch/lxml-2.0/doc/lxml-source-howto.txt (original)
+++ lxml/branch/lxml-2.0/doc/lxml-source-howto.txt Mon Feb 18 11:19:44 2008
@@ -16,9 +16,19 @@
.. _lxml: http://codespeak.net/lxml
.. _`how to build lxml from sources`: build.html
.. _`ReStructured Text`: http://docutils.sourceforge.net/rst.html
+.. _epydoc: http://epydoc.sourceforge.net/
+.. _docutils: http://docutils.sourceforge.net/
+.. _`C-level API`: capi.html
.. contents::
..
+ 1 What is Cython?
+ 2 Where to start?
+ 2.1 Concepts
+ 2.2 The documentation
+ 3 lxml.etree
+ 4 lxml.objectify
+ 5 lxml.html
What is Cython?
@@ -29,21 +39,23 @@
Cython_ is the language that lxml is written in. It is a very
Python-like language that was specifically designed for writing Python
-extension modules. The language is so close to Python that the Cython
-compiler can actually compile many, many Python programs to C without
-major modifications. But the real speed gains of a C compilation come
-from type annotations that were added to the language and that allow
-Cython to generate very efficient C code.
+extension modules.
The reason why Cython (or actually its predecessor Pyrex_ at the time)
was chosen as an implementation language for lxml, is that it makes it
very easy to interface with both the Python world and external C code.
Cython generates all the necessary glue code for the Python API,
-including Python types and reference counting for Python objects.
-Calling into C code is not more than declaring the signature of the
-function and maybe some variables as being C types, pointers or
-structs, and then calling it. The rest of the code is just plain
-Python code.
+including Python types, calling conventions and reference counting.
+On the other side of the table, calling into C code is not more than
+declaring the signature of the function and maybe some variables as
+being C types, pointers or structs, and then calling it. The rest of
+the code is just plain Python code.
+
+The Cython language is so close to Python that the Cython compiler can
+actually compile many, many Python programs to C without major
+modifications. But the real speed gains of a C compilation come from
+type annotations that were added to the language and that allow Cython
+to generate very efficient C code.
Even if you are not familiar with Cython, you should keep in mind that
a slow implementation of a feature is better than none. So, if you
@@ -56,7 +68,7 @@
Where to start?
===============
-First of all, read `how to build lxml from sources` to learn how to
+First of all, read `how to build lxml from sources`_ to learn how to
retrieve the source code from the Subversion repository and how to
build it. The source code lives in the subdirectory ``src`` of the
checkout.
@@ -65,21 +77,12 @@
``lxml.objectify``. All main modules have the file extension
``.pyx``, which shows the descendence from Pyrex. As usual in Python,
the main files start with a short description and a couple of imports.
-Cython destinguishes between the run-time ``import`` statement (as
+Cython distinguishes between the run-time ``import`` statement (as
known from Python) and the compile-time ``cimport`` statement, which
imports C declarations, either from external libraries or from other
Cython modules.
-The documentation
------------------
-
-* docs in ``doc`` directory
-* `ReStructured Text`_ format
-* generated through ``mkhtml.py`` script
-* ...
-
-
Concepts
--------
@@ -88,6 +91,38 @@
* ...
+The documentation
+-----------------
+
+An important part of lxml is the documentation that lives in the
+``doc`` directory. It describes a large part of the API and comprises
+a lot of example code in the form of doctests.
+
+The documentation is written in the `ReStructured Text`_ format, a
+very powerful text markup language that looks almost like plain text.
+It is part of the docutils_ package.
+
+The project web site of lxml_ is completely generated from these text
+documents. Even the side menu is just collected from the table of
+contents that the ReST processor writes into each HTML page.
+Obviously, we use lxml for this.
+
+The easiest way to generate the HTML pages is by calling::
+
+ make html
+
+This will call the script ``doc/mkhtml.py`` to run the ReST processor
+on the files. After generating an HTML page the script parses it back
+in to build the side menu, and injects the complete menu into each
+page at the very end.
+
+Running the ``make`` command will also generate the API documentation
+if you have epydoc_ installed. The epydoc package will import and
+introspect the extension modules and also introspect and parse the
+Python modules of lxml. The aggregated information will then be
+written out into an HTML documentation site.
+
+
lxml.etree
==========
@@ -104,14 +139,7 @@
The main include files are:
-proxy.pxi:
-
- Very low-level functions for memory allocation/deallocation
- and Element proxy handling. Ignoring this for the beginning
- will keep your head from exploding.
-
-apihelpers.pxi:
-
+apihelpers.pxi
Private C helper functions. Most of the little functions that are
used all over the place are defined here. This includes things
like reading out the text content of a libxml2 tree node, checking
@@ -120,77 +148,112 @@
should keep these functions in the back of your head, as they will
definitely make your life easier.
-xmlerror.pxi:
-
- Error log handling. All error messages that libxml2 generates
- internally walk through the code in this file to end up in lxml's
- Python level error logs.
-
- At the end of the file, you will find a long list of named error
- codes. It is generated from the libxml2 HTML documentation (using
- lxml, of course). See the script ``update-error-constants.py``
- for this.
-
-classlookup.pxi:
-
+classlookup.pxi
Element class lookup mechanisms. The main API and engines for
those who want to define custom Element classes and inject them
into lxml.
-nsclasses.pxi:
+docloader.pxi
+ Support for custom document loaders. Base class and registry for
+ custom document resolvers.
+
+extensions.pxi
+ Infrastructure for extension functions in XPath/XSLT, including
+ XPath value conversion and function registration.
+
+iterparse.pxi
+ Incremental XML parsing. An iterator class that builds iterparse
+ events while parsing.
+nsclasses.pxi
Namespace implementation and registry. The registry and engine
for Element classes that use the ElementNamespaceClassLookup
scheme.
-docloader.pxi:
-
- Support for custom document loaders. Base class and registry for
- custom document resolvers.
-
-parser.pxi:
-
+parser.pxi
Parsers for XML and HTML. This is the main parser engine. It's
the reason why you can parse a document from various sources in
two lines of Python code. It's definitely not the right place to
start reading lxml's soure code.
-parsertarget.pxi:
+parsertarget.pxi
+ An ElementTree compatible parser target implementation based on
+ the SAX2 interface of libxml2.
- ET Parser target.
+proxy.pxi
+ Very low-level functions for memory allocation/deallocation
+ and Element proxy handling. Ignoring this for the beginning
+ will safe your head from exploding.
-serializer.pxi:
+public-api.pxi
+ The set of C functions that are exported to other extension
+ modules at the C level. For example, ``lxml.objectify`` makes use
+ of these. See the `C-level API` documentation.
+serializer.pxi
XML output functions. Basically everything that creates byte
sequences from XML trees.
-iterparse.pxi:
+xinclude.pxi
+ XInclude implementation.
- Incremental XML parsing. An iterator class that builds iterparse
- events while parsing.
+xmlerror.pxi
+ Error log handling. All error messages that libxml2 generates
+ internally walk through the code in this file to end up in lxml's
+ Python level error logs.
-xmlid.pxi:
+ At the end of the file, you will find a long list of named error
+ codes. It is generated from the libxml2 HTML documentation (using
+ lxml, of course). See the script ``update-error-constants.py``
+ for this.
+xmlid.pxi
XMLID and IDDict, a dictionary-like way to find Elements by their
XML-ID attribute.
-xinclude.pxi:
+xpath.pxi
+ XPath evaluators.
- XInclude implementation.
+xslt.pxi
+ XSL transformations, including the ``XSLT`` class, document lookup
+ handling and access control.
-extensions.pxi:
+The different schema languages (DTD, RelaxNG, XML Schema and
+Schematron) are implemented in the following include files:
- Infrastructure for extension functions in XPath/XSLT, including
- XPath value conversion and function registration.
+* dtd.pxi
+* relaxng.pxi
+* schematron.pxi
+* xmlschema.pxi
-xpath.pxi:
- XPath evaluators.
+Python modules
+==============
-xslt.pxi:
+The ``lxml`` package also contains a number of pure Python modules:
- XSL transformations, including the ``XSLT`` class, document lookup
- handling and access control.
+builder.py
+ The E-factory and the ElementBuilder class. These provide a
+ simple interface to XML tree generation.
+
+cssselect.py
+ A CSS selector implementation based on XPath. The main class is
+ called ``CSSSelector``.
+
+doctestcompare.py
+ ...
+
+ElementInclude.py
+ ...
+
+_elementpath.py
+ ...
+
+sax.py
+ ...
+
+usedoctest.py
+ ...
lxml.objectify
From ianb at codespeak.net Mon Feb 18 18:44:12 2008
From: ianb at codespeak.net (ianb at codespeak.net)
Date: Mon, 18 Feb 2008 18:44:12 +0100 (CET)
Subject: [Lxml-checkins] r51601 - lxml/trunk/src/lxml/html
Message-ID: <20080218174412.C933D1683DD@codespeak.net>
Author: ianb
Date: Mon Feb 18 18:44:09 2008
New Revision: 51601
Modified:
lxml/trunk/src/lxml/html/__init__.py
Log:
Make getter/setter methods private
Modified: lxml/trunk/src/lxml/html/__init__.py
==============================================================================
--- lxml/trunk/src/lxml/html/__init__.py (original)
+++ lxml/trunk/src/lxml/html/__init__.py Mon Feb 18 18:44:09 2008
@@ -67,7 +67,7 @@
return self.xpath('//head')[0]
head = property(head, doc=head.__doc__)
- def label__get(self):
+ def _label__get(self):
"""
Get or set any ", base_url="http://no/such/url")
Modified: lxml/trunk/src/lxml/xmlid.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlid.pxi (original)
+++ lxml/trunk/src/lxml/xmlid.pxi Thu Feb 21 17:19:36 2008
@@ -40,7 +40,7 @@
else:
return (root, _IDDict(root))
-def parseid(source, parser=None):
+def parseid(source, parser=None, *, base_url=None):
"""parseid(source, parser=None)
Parses the source into a tuple containing an ElementTree object and an
@@ -51,7 +51,7 @@
The results are undefined.
"""
cdef _Document doc
- doc = _parseDocument(source, parser)
+ doc = _parseDocument(source, parser, base_url)
return (_elementTreeFactory(doc, None), _IDDict(doc))
cdef class _IDDict:
Modified: lxml/trunk/src/lxml/xmlschema.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlschema.pxi (original)
+++ lxml/trunk/src/lxml/xmlschema.pxi Thu Feb 21 17:19:36 2008
@@ -58,7 +58,7 @@
self._error_log.connect()
parser_ctxt = xmlschema.xmlSchemaNewParserCtxt(_cstr(filename))
else:
- doc = _parseDocument(file, None)
+ doc = _parseDocument(file, None, None)
self._error_log.connect()
parser_ctxt = xmlschema.xmlSchemaNewDocParserCtxt(doc._c_doc)
else:
From scoder at codespeak.net Thu Feb 21 17:19:41 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 21 Feb 2008 17:19:41 +0100 (CET)
Subject: [Lxml-checkins] r51740 - in lxml/trunk: . src/lxml
Message-ID: <20080221161941.620C91684DB@codespeak.net>
Author: scoder
Date: Thu Feb 21 17:19:41 2008
New Revision: 51740
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/proxy.pxi
Log:
r3550 at delle: sbehnel | 2008-02-19 20:23:54 +0100
cleanup
Modified: lxml/trunk/src/lxml/proxy.pxi
==============================================================================
--- lxml/trunk/src/lxml/proxy.pxi (original)
+++ lxml/trunk/src/lxml/proxy.pxi Thu Feb 21 17:19:41 2008
@@ -281,20 +281,22 @@
if c_cache_size == 0:
c_cache_size = 20
else:
- c_cache_size = c_cache_size * 2
+ c_cache_size *= 2
c_ns_new_cache = python.PyMem_Realloc(
c_ns_new_cache, c_cache_size * sizeof(xmlNs*))
if c_ns_new_cache is NULL:
python.PyMem_Free(c_ns_old_cache)
python.PyErr_NoMemory()
+ return -1
c_ns_old_cache = python.PyMem_Realloc(
c_ns_old_cache, c_cache_size * sizeof(xmlNs*))
if c_ns_old_cache is NULL:
python.PyMem_Free(c_ns_new_cache)
python.PyErr_NoMemory()
+ return -1
c_ns_new_cache[c_cache_last] = c_new_ns
c_ns_old_cache[c_cache_last] = c_node.ns
- c_cache_last = c_cache_last + 1
+ c_cache_last += 1
c_node.ns = c_new_ns
if c_node is c_element:
# after the element, continue with its attributes
From scoder at codespeak.net Thu Feb 21 17:19:44 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 21 Feb 2008 17:19:44 +0100 (CET)
Subject: [Lxml-checkins] r51741 - lxml/trunk
Message-ID: <20080221161944.9873F1684E4@codespeak.net>
Author: scoder
Date: Thu Feb 21 17:19:44 2008
New Revision: 51741
Modified:
lxml/trunk/ (props changed)
lxml/trunk/Makefile
Log:
r3551 at delle: sbehnel | 2008-02-19 21:51:25 +0100
gdb make target
Modified: lxml/trunk/Makefile
==============================================================================
--- lxml/trunk/Makefile (original)
+++ lxml/trunk/Makefile Thu Feb 21 17:19:44 2008
@@ -24,6 +24,10 @@
valgrind --tool=memcheck --leak-check=full --num-callers=30 --suppressions=valgrind-python.supp \
$(PYTHON) test.py
+gdb_test_inplace: inplace
+ @echo -e "file $(PYTHON)\nrun test.py" > .gdb.command
+ gdb -x .gdb.command -d src -d src/lxml
+
bench_inplace: inplace
$(PYTHON) benchmark/bench_etree.py -i
$(PYTHON) benchmark/bench_xpath.py -i
@@ -51,6 +55,8 @@
valtest: valgrind_test_inplace
+gdbtest: gdb_test_inplace
+
bench: bench_inplace
ftest: ftest_inplace
From scoder at codespeak.net Thu Feb 21 17:19:50 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 21 Feb 2008 17:19:50 +0100 (CET)
Subject: [Lxml-checkins] r51742 - in lxml/trunk: . src/lxml/tests
Message-ID: <20080221161950.5E2BE1684EE@codespeak.net>
Author: scoder
Date: Thu Feb 21 17:19:49 2008
New Revision: 51742
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/test_css.py
lxml/trunk/src/lxml/tests/test_elementtree.py
lxml/trunk/src/lxml/tests/test_errors.py
lxml/trunk/src/lxml/tests/test_io.py
lxml/trunk/src/lxml/tests/test_unicode.py
Log:
r3552 at delle: sbehnel | 2008-02-19 22:43:13 +0100
let all test case classes inherit from HelperTestCase
Modified: lxml/trunk/src/lxml/tests/test_css.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_css.py (original)
+++ lxml/trunk/src/lxml/tests/test_css.py Thu Feb 21 17:19:49 2008
@@ -1,5 +1,5 @@
import unittest, sys
-from lxml.tests.common_imports import doctest
+from lxml.tests.common_imports import doctest, HelperTestCase
from lxml import html
from lxml import cssselect
import os
@@ -9,7 +9,7 @@
# Data borrowed from http://mootools.net/slickspeed/
-class CSSTestCase(unittest.TestCase):
+class CSSTestCase(HelperTestCase):
selectors = [
## Changed from original; probably because I'm only searching the body
@@ -59,7 +59,7 @@
def __init__(self, index):
self.index = index
- unittest.TestCase.__init__(self)
+ super(HelperTestCase, self).__init__()
def all(cls):
for i in range(len(cls.selectors)):
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Thu Feb 21 17:19:49 2008
@@ -12,7 +12,7 @@
import os, re, tempfile, copy, operator, gc
from common_imports import StringIO, etree, ElementTree, cElementTree
-from common_imports import fileInTestDir, canonicalize
+from common_imports import fileInTestDir, canonicalize, HelperTestCase
if cElementTree is not None:
if tuple([int(n) for n in
@@ -27,12 +27,9 @@
seq = list(seq)[::-1]
return seq
-class ETreeTestCaseBase(unittest.TestCase):
+class ETreeTestCaseBase(HelperTestCase):
etree = None
- def tearDown(self):
- gc.collect()
-
def test_element(self):
for i in range(10):
e = self.etree.Element('foo')
@@ -3290,12 +3287,6 @@
mapping["key"] = "value"
self.assertEquals("value", mapping["key"])
- # assertFalse doesn't exist in Python 2.3
- try:
- unittest.TestCase.assertFalse
- except AttributeError:
- assertFalse = unittest.TestCase.failIf
-
if etree:
class ETreeTestCase(ETreeTestCaseBase):
Modified: lxml/trunk/src/lxml/tests/test_errors.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_errors.py (original)
+++ lxml/trunk/src/lxml/tests/test_errors.py Thu Feb 21 17:19:49 2008
@@ -9,7 +9,9 @@
import sys, gc
from lxml import etree
-class ErrorTestCase(unittest.TestCase):
+from common_imports import HelperTestCase
+
+class ErrorTestCase(HelperTestCase):
etree = etree
def test_bad_element(self):
Modified: lxml/trunk/src/lxml/tests/test_io.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_io.py (original)
+++ lxml/trunk/src/lxml/tests/test_io.py Thu Feb 21 17:19:49 2008
@@ -8,9 +8,9 @@
import tempfile, gzip, os, gc, shutil
from common_imports import etree, ElementTree, fileInTestDir
-from common_imports import SillyFileLike, LargeFileLike
+from common_imports import SillyFileLike, LargeFileLike, HelperTestCase
-class IOTestCaseBase(unittest.TestCase):
+class IOTestCaseBase(HelperTestCase):
"""(c)ElementTree compatibility for IO functions/methods
"""
etree = None
Modified: lxml/trunk/src/lxml/tests/test_unicode.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_unicode.py (original)
+++ lxml/trunk/src/lxml/tests/test_unicode.py Thu Feb 21 17:19:49 2008
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
import unittest, doctest
-from common_imports import StringIO, etree, SillyFileLike
+from common_imports import StringIO, etree, SillyFileLike, HelperTestCase
ascii_uni = u'a'
@@ -13,7 +13,7 @@
uxml = u"test ??\u3120
page ??\u3120 title
"
-class UnicodeTestCase(unittest.TestCase):
+class UnicodeTestCase(HelperTestCase):
def test_unicode_xml(self):
tree = etree.XML(u'
%s
' % uni)
self.assertEquals(uni, tree.text)
From scoder at codespeak.net Thu Feb 21 17:19:54 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 21 Feb 2008 17:19:54 +0100 (CET)
Subject: [Lxml-checkins] r51743 - in lxml/trunk: . src/lxml
Message-ID: <20080221161954.91EFD1684E6@codespeak.net>
Author: scoder
Date: Thu Feb 21 17:19:54 2008
New Revision: 51743
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tree.pxd
lxml/trunk/src/lxml/xmlparser.pxd
Log:
r3553 at delle: sbehnel | 2008-02-19 22:44:41 +0100
libxml2 debugging functions
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Thu Feb 21 17:19:54 2008
@@ -305,6 +305,7 @@
cdef extern from "libxml/xmlmemory.h":
cdef void* xmlMalloc(size_t size) nogil
+ cdef int xmlMemBlocks() nogil
cdef extern from "etree_defs.h":
cdef bint _isElement(xmlNode* node) nogil
Modified: lxml/trunk/src/lxml/xmlparser.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlparser.pxd (original)
+++ lxml/trunk/src/lxml/xmlparser.pxd Thu Feb 21 17:19:54 2008
@@ -120,6 +120,8 @@
XML_PARSE_COMPACT = 65536 # compact small text nodes
cdef void xmlInitParser() nogil
+ cdef void xmlCleanupParser() nogil
+
cdef int xmlLineNumbersDefault(int onoff) nogil
cdef xmlParserCtxt* xmlNewParserCtxt() nogil
cdef xmlParserInput* xmlNewIOInputStream(xmlParserCtxt* ctxt,
From scoder at codespeak.net Thu Feb 21 17:19:58 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 21 Feb 2008 17:19:58 +0100 (CET)
Subject: [Lxml-checkins] r51744 - in lxml/trunk: . src/lxml
Message-ID: <20080221161958.917741684F1@codespeak.net>
Author: scoder
Date: Thu Feb 21 17:19:58 2008
New Revision: 51744
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/schematron.pxi
Log:
r3554 at delle: sbehnel | 2008-02-19 23:04:07 +0100
fix schematron memory leak after crash bug was fixed in libxml2 2.6.31
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu Feb 21 17:19:58 2008
@@ -14,6 +14,8 @@
Bugs fixed
----------
+* Memory leak in Schematron (fixed only for libxml2 2.6.31+).
+
* Error type names in RelaxNG were reported incorrectly.
* Slice deletion bug fixed in objectify.
Modified: lxml/trunk/src/lxml/schematron.pxi
==============================================================================
--- lxml/trunk/src/lxml/schematron.pxi (original)
+++ lxml/trunk/src/lxml/schematron.pxi Thu Feb 21 17:19:58 2008
@@ -107,12 +107,17 @@
if parser_ctxt is NULL:
self._error_log.disconnect()
python.PyErr_NoMemory()
+ return
self._c_schema = schematron.xmlSchematronParse(parser_ctxt)
self._error_log.disconnect()
schematron.xmlSchematronFreeParserCtxt(parser_ctxt)
if self._c_schema is NULL:
+ if _LIBXML_VERSION_INT >= 20631:
+ # leak in older versions instead of just crashing
+ if c_doc is not NULL:
+ tree.xmlFreeDoc(c_doc)
raise SchematronParseError(
"Document is not a valid Schematron schema",
self._error_log)
From scoder at codespeak.net Thu Feb 21 17:20:02 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 21 Feb 2008 17:20:02 +0100 (CET)
Subject: [Lxml-checkins] r51745 - in lxml/trunk: . src/lxml
Message-ID: <20080221162002.BA8F41684C1@codespeak.net>
Author: scoder
Date: Thu Feb 21 17:20:02 2008
New Revision: 51745
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/cstd.pxd
lxml/trunk/src/lxml/proxy.pxi
Log:
r3555 at delle: sbehnel | 2008-02-19 23:53:09 +0100
refactoring and cleanup of modeNodeToDocument: shorter and simpler code, better documented
Modified: lxml/trunk/src/lxml/cstd.pxd
==============================================================================
--- lxml/trunk/src/lxml/cstd.pxd (original)
+++ lxml/trunk/src/lxml/cstd.pxd Thu Feb 21 17:20:02 2008
@@ -17,6 +17,7 @@
cdef extern from "stdlib.h":
cdef void* malloc(size_t size) nogil
+ cdef void* realloc(void* ptr, size_t size) nogil
cdef void free(void* ptr) nogil
cdef extern from "stdarg.h":
Modified: lxml/trunk/src/lxml/proxy.pxi
==============================================================================
--- lxml/trunk/src/lxml/proxy.pxi (original)
+++ lxml/trunk/src/lxml/proxy.pxi Thu Feb 21 17:20:02 2008
@@ -203,17 +203,38 @@
Mainly copied from libxml2's xmlReconciliateNs(). Expects libxml2 doc
pointers of node to be correct already, but fixes _Document references.
+
+ For each node in the subtree, we do three things here:
+
+ 1) Remove redundant declarations of namespace that are already
+ defined in its parents.
+
+ 2) Replace namespaces that are *not* defined on the node or its
+ parents by the equivalent namespace declarations that *are*
+ defined on the node or its parents (possibly using a different
+ prefix). If a namespace is unknown, declare a new one on the
+ node.
+
+ 3) Set the Document reference to the new Document (if different).
+ This is done on backtracking to keep the original Document
+ alive as long as possible, until all its elements are updated.
+
+ Note that the namespace declarations are removed from the tree in
+ step 1), but freed only after the complete subtree was traversed
+ and all occurrences were replaced by tree-internal pointers.
"""
cdef _Element element
cdef xmlDoc* c_doc
cdef xmlNode* c_start_node
cdef xmlNode* c_node
+ cdef xmlNs** c_ns_ptr
cdef xmlNs** c_ns_new_cache
cdef xmlNs** c_ns_old_cache
cdef xmlNs* c_ns
+ cdef xmlNs* c_ns_next
+ cdef xmlNs* c_nsdef
cdef xmlNs* c_new_ns
cdef xmlNs* c_del_ns
- cdef xmlNs* c_last_del_ns
cdef cstd.size_t i, c_cache_size, c_cache_last
if not tree._isElementOrXInclude(c_element):
@@ -225,73 +246,67 @@
c_ns_old_cache = NULL
c_cache_size = 0
c_cache_last = 0
- c_del_ns = c_last_del_ns = NULL
+ c_del_ns = NULL
while c_element is not NULL:
- # remove namespaces defined here that are known in the new ancestors
- if c_element.nsDef is not NULL:
- while c_element.nsDef is not NULL:
+ # 1) cut out namespaces defined here that are already known by
+ # the ancestors
+ c_nsdef = c_element.nsDef
+ if c_nsdef is not NULL:
+ # start with second nsdef to keep c_element.nsDef for now
+ while c_nsdef.next is not NULL:
+ if c_nsdef.next is c_element.ns:
+ c_nsdef = c_nsdef.next
+ continue
c_ns = tree.xmlSearchNsByHref(
- c_element.doc, c_element.parent, c_element.nsDef.href)
+ c_element.doc, c_element.parent, c_nsdef.next.href)
if c_ns is NULL:
- break
- if c_del_ns is NULL:
- c_del_ns = c_last_del_ns = c_element.nsDef
- else:
- c_last_del_ns.next = c_element.nsDef
- c_last_del_ns = c_element.nsDef
- c_element.nsDef = c_element.nsDef.next
- if c_element.nsDef is not NULL:
- c_new_ns = c_element.nsDef
- while c_new_ns.next is not NULL:
- if c_new_ns.next is not c_element.ns:
- c_ns = tree.xmlSearchNsByHref(
- c_element.doc, c_element.parent, c_new_ns.next.href)
- if c_ns is not NULL:
- # already known or equal to a known definition
- if c_del_ns is NULL:
- c_del_ns = c_last_del_ns = c_new_ns.next
- else:
- c_last_del_ns.next = c_new_ns.next
- c_last_del_ns = c_new_ns.next
- c_new_ns.next = c_new_ns.next.next
- else:
- c_new_ns = c_new_ns.next
- else:
- c_new_ns = c_new_ns.next
+ c_nsdef = c_nsdef.next
+ continue
+ # cut out c_nsdef.next and prepend it to garbage chain
+ c_ns_next = c_nsdef.next.next
+ c_nsdef.next.next = c_del_ns
+ c_del_ns = c_nsdef.next
+ c_nsdef.next = c_ns_next
+ # now handle c_element.nsDef
+ c_ns = tree.xmlSearchNsByHref(
+ c_element.doc, c_element.parent, c_element.nsDef.href)
+ if c_ns is not NULL:
+ c_ns_next = c_element.nsDef.next
+ c_element.nsDef.next = c_del_ns
+ c_del_ns = c_element.nsDef
+ c_element.nsDef = c_ns_next
- # make sure the namespace of an element and its attributes is declared
- # in this document
+ # 2) make sure the namespace of an element and its attributes
+ # is declared in this document (i.e. the node or its parents)
c_node = c_element
while c_node is not NULL:
if c_node.ns is not NULL:
- c_ns = c_node.ns
for i from 0 <= i < c_cache_last:
- if c_ns is c_ns_old_cache[i]:
+ if c_node.ns is c_ns_old_cache[i]:
c_node.ns = c_ns_new_cache[i]
- c_ns = NULL
break
-
- if c_ns is not NULL:
- # not in cache, must find a replacement from this document
- c_new_ns = doc._findOrBuildNodeNs(c_element,
- c_ns.href, c_ns.prefix)
+ else:
+ # not in cache => find a replacement from this document
+ c_new_ns = doc._findOrBuildNodeNs(
+ c_element, c_node.ns.href, c_node.ns.prefix)
if c_cache_last >= c_cache_size:
# must resize cache
if c_cache_size == 0:
c_cache_size = 20
else:
c_cache_size *= 2
- c_ns_new_cache = python.PyMem_Realloc(
+ c_ns_ptr = cstd.realloc(
c_ns_new_cache, c_cache_size * sizeof(xmlNs*))
- if c_ns_new_cache is NULL:
- python.PyMem_Free(c_ns_old_cache)
- python.PyErr_NoMemory()
- return -1
- c_ns_old_cache = python.PyMem_Realloc(
- c_ns_old_cache, c_cache_size * sizeof(xmlNs*))
- if c_ns_old_cache is NULL:
- python.PyMem_Free(c_ns_new_cache)
+ if c_ns_ptr is not NULL:
+ c_ns_new_cache = c_ns_ptr
+ c_ns_ptr = cstd.realloc(
+ c_ns_old_cache, c_cache_size * sizeof(xmlNs*))
+ if c_ns_ptr is not NULL:
+ c_ns_old_cache = c_ns_ptr
+ else:
+ cstd.free(c_ns_new_cache)
+ cstd.free(c_ns_old_cache)
python.PyErr_NoMemory()
return -1
c_ns_new_cache[c_cache_last] = c_new_ns
@@ -313,7 +328,7 @@
if c_node is NULL:
# no children => back off and continue with siblings and parents
- # fix _Document reference (may dealloc the original document!)
+ # 3) fix _Document reference (may dealloc the original document!)
if c_element._private is not NULL:
element = <_Element>c_element._private
if element._doc is not doc:
@@ -323,7 +338,7 @@
element._gc_doc = doc
if c_element is c_start_node:
- break
+ break # all done
# continue with siblings
c_node = c_element.next
@@ -336,7 +351,7 @@
if c_element is NULL or not tree._isElementOrXInclude(c_element):
break
- # fix _Document reference (may dealloc the original document!)
+ # 3) fix _Document reference (may dealloc the original document!)
if c_element._private is not NULL:
element = <_Element>c_element._private
if element._doc is not doc:
@@ -353,18 +368,17 @@
not tree._isElementOrXInclude(c_node)):
c_node = c_node.next
if c_node is c_start_node:
- break
+ break # all done
c_element = c_node
# free now unused namespace declarations
if c_del_ns is not NULL:
- c_last_del_ns.next = NULL
tree.xmlFreeNsList(c_del_ns)
# cleanup
if c_ns_new_cache is not NULL:
- python.PyMem_Free(c_ns_new_cache)
+ cstd.free(c_ns_new_cache)
if c_ns_old_cache is not NULL:
- python.PyMem_Free(c_ns_old_cache)
+ cstd.free(c_ns_old_cache)
return 0
From scoder at codespeak.net Thu Feb 21 17:20:06 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 21 Feb 2008 17:20:06 +0100 (CET)
Subject: [Lxml-checkins] r51746 - in lxml/trunk: . doc
Message-ID: <20080221162006.0356F1684E4@codespeak.net>
Author: scoder
Date: Thu Feb 21 17:20:06 2008
New Revision: 51746
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/validation.txt
Log:
r3556 at delle: sbehnel | 2008-02-20 17:52:29 +0100
doc update
Modified: lxml/trunk/doc/validation.txt
==============================================================================
--- lxml/trunk/doc/validation.txt (original)
+++ lxml/trunk/doc/validation.txt Thu Feb 21 17:20:06 2008
@@ -195,10 +195,7 @@
RELAXNG_ERR_ELEMWRONG
Note that this error log is local to the RelaxNG object. It will only
-contain log entries that appeared during the validation. The
-DocumentInvalid exception raised by the ``assertValid`` method above
-provides access to the global error log (like all other lxml
-exceptions).
+contain log entries that appeared during the validation.
Similar to XSLT, there's also a less efficient but easier shortcut method to
do one-shot RelaxNG validation::
@@ -208,6 +205,13 @@
>>> doc2.relaxng(relaxng_doc)
False
+libxml2 does not currently support the `RelaxNG Compact Syntax`_.
+However, the trang_ translator can convert the compact syntax to the
+XML syntax, which can then be used with lxml.
+
+.. _`RelaxNG Compact Syntax`:
+.. _trang: http://www.thaiopensource.com/relaxng/trang.html
+
XMLSchema
---------
From scoder at codespeak.net Thu Feb 21 17:20:10 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 21 Feb 2008 17:20:10 +0100 (CET)
Subject: [Lxml-checkins] r51747 - in lxml/trunk: . doc
Message-ID: <20080221162010.6B26B1684F6@codespeak.net>
Author: scoder
Date: Thu Feb 21 17:20:10 2008
New Revision: 51747
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/objectify.txt
Log:
r3557 at delle: sbehnel | 2008-02-21 07:04:48 +0100
doc fix
Modified: lxml/trunk/doc/objectify.txt
==============================================================================
--- lxml/trunk/doc/objectify.txt (original)
+++ lxml/trunk/doc/objectify.txt Thu Feb 21 17:20:10 2008
@@ -1166,7 +1166,7 @@
.. _`namespace specific classes`: element_classes.html#namespace-class-lookup
-See the documentation on `class lookup` schemes for more information.
+See the documentation on `class lookup`_ schemes for more information.
What is different from lxml.etree?
From scoder at codespeak.net Thu Feb 21 17:20:14 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 21 Feb 2008 17:20:14 +0100 (CET)
Subject: [Lxml-checkins] r51748 - in lxml/trunk: . src/lxml/tests
Message-ID: <20080221162014.522FE1684F7@codespeak.net>
Author: scoder
Date: Thu Feb 21 17:20:13 2008
New Revision: 51748
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/test_dtd.py
Log:
r3558 at delle: sbehnel | 2008-02-21 07:34:42 +0100
more DTD validation tests
Modified: lxml/trunk/src/lxml/tests/test_dtd.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_dtd.py (original)
+++ lxml/trunk/src/lxml/tests/test_dtd.py Thu Feb 21 17:20:13 2008
@@ -33,6 +33,32 @@
self.assertRaises(etree.XMLSyntaxError,
fromstring, xml, parser=parser)
+ def test_dtd_parse_file_not_found(self):
+ fromstring = etree.fromstring
+ dtd_filename = fileInTestDir("__nosuch.dtd")
+ parser = etree.XMLParser(dtd_validation=True)
+ xml = '' % dtd_filename
+ self.assertRaises(etree.XMLSyntaxError,
+ fromstring, xml, parser=parser)
+ errors = None
+ try:
+ fromstring(xml, parser=parser)
+ except etree.XMLSyntaxError, e:
+ errors = [ entry.message for entry in e.error_log
+ if dtd_filename in entry.message ]
+ self.assert_(errors)
+
+ def test_dtd_parse_valid(self):
+ parser = etree.XMLParser(dtd_validation=True)
+ xml = '' % fileInTestDir("test.dtd")
+ root = etree.fromstring(xml, parser=parser)
+
+ def test_dtd_parse_valid_relative(self):
+ parser = etree.XMLParser(dtd_validation=True)
+ xml = ''
+ root = etree.fromstring(xml, parser=parser,
+ base_url=fileInTestDir("test.xml"))
+
def test_dtd_invalid(self):
root = etree.XML("")
dtd = etree.DTD(StringIO(""))
From lxml-checkins at codespeak.net Thu Feb 21 18:53:14 2008
From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net)
Date: Thu, 21 Feb 2008 18:53:14 +0100 (CET)
Subject: [Lxml-checkins] February 70% OFF
Message-ID: <20080221115227.3859.qmail@balzak.customer.top.net.ua>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20080221/8860466c/attachment.htm
From scoder at codespeak.net Thu Feb 21 19:10:42 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 21 Feb 2008 19:10:42 +0100 (CET)
Subject: [Lxml-checkins] r51756 - lxml/trunk
Message-ID: <20080221181042.6E554168455@codespeak.net>
Author: scoder
Date: Thu Feb 21 19:10:40 2008
New Revision: 51756
Modified:
lxml/trunk/ (props changed)
lxml/trunk/version.txt
Log:
r3574 at delle: sbehnel | 2008-02-21 19:09:19 +0100
version
Modified: lxml/trunk/version.txt
==============================================================================
--- lxml/trunk/version.txt (original)
+++ lxml/trunk/version.txt Thu Feb 21 19:10:40 2008
@@ -1 +1 @@
-2.0.1
+2.0.2
From scoder at codespeak.net Thu Feb 21 19:10:45 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 21 Feb 2008 19:10:45 +0100 (CET)
Subject: [Lxml-checkins] r51757 - in lxml/trunk: . src/lxml
Message-ID: <20080221181045.6AAEA168454@codespeak.net>
Author: scoder
Date: Thu Feb 21 19:10:44 2008
New Revision: 51757
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/classlookup.pxi
Log:
r3575 at delle: sbehnel | 2008-02-21 19:10:03 +0100
move C initialisation into __cinit__ rather than __init__
Modified: lxml/trunk/src/lxml/classlookup.pxi
==============================================================================
--- lxml/trunk/src/lxml/classlookup.pxi (original)
+++ lxml/trunk/src/lxml/classlookup.pxi Thu Feb 21 19:10:44 2008
@@ -58,7 +58,7 @@
Superclass of Element class lookups.
"""
cdef _element_class_lookup_function _lookup_function
- def __init__(self):
+ def __cinit__(self):
self._lookup_function = NULL # use default lookup
cdef public class FallbackElementClassLookup(ElementClassLookup) \
@@ -71,7 +71,6 @@
cdef readonly ElementClassLookup fallback
cdef _element_class_lookup_function _fallback_function
def __init__(self, ElementClassLookup fallback=None):
- self._lookup_function = NULL # use default lookup
if fallback is not None:
self._setFallback(fallback)
else:
From scoder at codespeak.net Fri Feb 22 08:17:40 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 22 Feb 2008 08:17:40 +0100 (CET)
Subject: [Lxml-checkins] r51769 - in lxml/trunk: . src/lxml
Message-ID: <20080222071740.F0D6F16851F@codespeak.net>
Author: scoder
Date: Fri Feb 22 08:17:39 2008
New Revision: 51769
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
r3579 at delle: sbehnel | 2008-02-22 07:57:16 +0100
default prefix for objectify's pytype
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Feb 22 08:17:39 2008
@@ -91,6 +91,8 @@
"http://www.w3.org/2001/XMLSchema-instance": "xsi",
# dublic core
"http://purl.org/dc/elements/1.1/": "dc",
+ # objectify
+ "http://codespeak.net/lxml/objectify/pytype" : "py",
}
# Error superclass for ElementTree compatibility
From scoder at codespeak.net Fri Feb 22 08:17:46 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 22 Feb 2008 08:17:46 +0100 (CET)
Subject: [Lxml-checkins] r51770 - in lxml/trunk: . doc src/lxml
src/lxml/tests
Message-ID: <20080222071746.2A3E9168520@codespeak.net>
Author: scoder
Date: Fri Feb 22 08:17:45 2008
New Revision: 51770
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/objectify.txt
lxml/trunk/src/lxml/lxml.objectify.pyx
lxml/trunk/src/lxml/tests/test_objectify.py
Log:
r3580 at delle: sbehnel | 2008-02-22 08:15:04 +0100
cleanup, deprecation of setPytypeAttributeTag() -> set_pytype_attribute_tag()
Modified: lxml/trunk/doc/objectify.txt
==============================================================================
--- lxml/trunk/doc/objectify.txt (original)
+++ lxml/trunk/doc/objectify.txt Fri Feb 22 08:17:45 2008
@@ -817,10 +817,11 @@
>>> print root.c
None
-Note that you can change the name and namespace used for this attribute
-through the ``setPytypeAttributeTag(tag)`` module function, in case your
-application ever needs to. There is also a utility function ``annotate()``
-that recursively generates this attribute for the elements of a tree::
+Note that you can change the name and namespace used for this
+attribute through the ``set_pytype_attribute_tag(tag)`` module
+function, in case your application ever needs to. There is also a
+utility function ``annotate()`` that recursively generates this
+attribute for the elements of a tree::
>>> root = objectify.fromstring("test5")
>>> print objectify.dump(root)
Modified: lxml/trunk/src/lxml/lxml.objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.objectify.pyx (original)
+++ lxml/trunk/src/lxml/lxml.objectify.pyx Fri Feb 22 08:17:45 2008
@@ -58,8 +58,9 @@
cdef object TREE_PYTYPE_NAME
TREE_PYTYPE_NAME = "TREE"
-def setPytypeAttributeTag(attribute_tag=None):
- """Changes name and namespace of the XML attribute that holds Python type
+def set_pytype_attribute_tag(attribute_tag=None):
+ """set_pytype_attribute_tag(attribute_tag=None)
+ Change name and namespace of the XML attribute that holds Python type
information.
Reset by calling without argument.
@@ -78,7 +79,12 @@
PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName(
_PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
-setPytypeAttributeTag()
+def setPytypeAttributeTag(attribute_tag=None):
+ """:deprecated: use ``set_pytype_attribute_tag()`` instead.
+ """
+ set_pytype_attribute_tag(attribute_tag)
+
+set_pytype_attribute_tag()
# namespaces for XML Schema
Modified: lxml/trunk/src/lxml/tests/test_objectify.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_objectify.py (original)
+++ lxml/trunk/src/lxml/tests/test_objectify.py Fri Feb 22 08:17:45 2008
@@ -18,7 +18,7 @@
XML_SCHEMA_INSTANCE_TYPE_ATTR = "{%s}type" % XML_SCHEMA_INSTANCE_NS
XML_SCHEMA_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS
TREE_PYTYPE = "TREE"
-DEFAULT_NSMAP = { "py" : PYTYPE_NAMESPACE,
+DEFAULT_NSMAP = { "py" : PYTYPE_NAMESPACE,
"xsi" : XML_SCHEMA_INSTANCE_NS,
"xsd" : XML_SCHEMA_NS}
@@ -86,7 +86,7 @@
def tearDown(self):
self.lookup.get_namespace("otherNS").clear()
- objectify.setPytypeAttributeTag()
+ objectify.set_pytype_attribute_tag()
del self.lookup
del self.parser
super(ObjectifyTestCase, self).tearDown()
@@ -1784,7 +1784,7 @@
'''
pytype_ns, pytype_name = objectify.PYTYPE_ATTRIBUTE[1:].split('}')
- objectify.setPytypeAttributeTag("{TEST}test")
+ objectify.set_pytype_attribute_tag("{TEST}test")
root = XML(xml)
objectify.annotate(root)
@@ -1796,7 +1796,7 @@
namespaces={"py" : "TEST"})
self.assertEquals(7, len(attribs))
- objectify.setPytypeAttributeTag()
+ objectify.set_pytype_attribute_tag()
pytype_ns, pytype_name = objectify.PYTYPE_ATTRIBUTE[1:].split('}')
self.assertNotEqual("test", pytype_ns.lower())
From scoder at codespeak.net Fri Feb 22 08:49:37 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 22 Feb 2008 08:49:37 +0100 (CET)
Subject: [Lxml-checkins] r51771 - in lxml/trunk: . src/lxml/html
Message-ID: <20080222074937.1FDC7168518@codespeak.net>
Author: scoder
Date: Fri Feb 22 08:49:36 2008
New Revision: 51771
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/html/__init__.py
Log:
r3583 at delle: sbehnel | 2008-02-22 08:47:33 +0100
more doctests (and doc fixes) for lxml.html
Modified: lxml/trunk/src/lxml/html/__init__.py
==============================================================================
--- lxml/trunk/src/lxml/html/__init__.py (original)
+++ lxml/trunk/src/lxml/html/__init__.py Fri Feb 22 08:49:36 2008
@@ -120,7 +120,7 @@
Example::
>>> h = fragment_fromstring('
"""
@@ -713,11 +713,11 @@
You can use this like::
- >>> form = doc.forms[0]
- >>> form.inputs['foo'].value = 'bar' # etc
- >>> response = form.submit()
- >>> doc = parse(response)
- >>> doc.make_links_absolute(response.geturl())
+ >>> form = doc.forms[0] # doctest: +SKIP
+ >>> form.inputs['foo'].value = 'bar' # etc # doctest: +SKIP
+ >>> response = form.submit() # doctest: +SKIP
+ >>> doc = parse(response) # doctest: +SKIP
+ >>> doc.make_links_absolute(response.geturl()) # doctest: +SKIP
To change the HTTP requester, pass a function as ``open_http`` keyword
argument that opens the URL for you. The function must have the following
@@ -1273,20 +1273,42 @@
def tostring(doc, pretty_print=False, include_meta_content_type=False,
encoding=None, method="html"):
- """
- return HTML string representation of the document given
+ """Return an HTML string representation of the document.
- note: if include_meta_content_type is true this will create a meta
- http-equiv="Content-Type" tag in the head; regardless of the value of include_meta_content_type
- any existing meta http-equiv="Content-Type" tag will be removed
+ Note: if include_meta_content_type is true this will create a
+ ```` tag in the head;
+ regardless of the value of include_meta_content_type any existing
+ ```` tag will be removed
+
+ The ``encoding`` argument controls the output encoding (defauts to
+ ASCII, with ...; character references for any characters outside
+ of ASCII).
+
+ The ``method`` argument defines the output mehtod. It defaults to
+ 'html', but can also be 'xml' for xhtml output, or 'text' to
+ serialise to plain text without markup. Note that you can pass
+ the builtin ``unicode`` type as ``encoding`` argument to serialise
+ to a unicode string.
+
+ Example::
+
+ >>> from lxml import html
+ >>> root = html.fragment_fromstring('
Hello world!
')
+
+ >>> html.tostring(root)
+ '
Hello world!
'
+ >>> html.tostring(root, method='html')
+ '
Hello world!
'
+
+ >>> html.tostring(root, method='xml')
+ '
Hello world!
'
- encoding controls the output encoding (defauts to ASCII, with
- character references for any characters outside of ASCII)
+ >>> html.tostring(root, method='text')
+ 'Helloworld!'
- method, which defaults to 'html', can also be 'xml' for xhtml
- output.
+ >>> html.tostring(root, method='text', encoding=unicode)
+ u'Helloworld!'
"""
- assert doc is not None
html = etree.tostring(doc, method=method, pretty_print=pretty_print,
encoding=encoding)
if not include_meta_content_type:
From scoder at codespeak.net Fri Feb 22 08:49:42 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 22 Feb 2008 08:49:42 +0100 (CET)
Subject: [Lxml-checkins] r51772 - in lxml/trunk: . doc
Message-ID: <20080222074942.DD8B916851D@codespeak.net>
Author: scoder
Date: Fri Feb 22 08:49:41 2008
New Revision: 51772
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/objectify.txt
Log:
r3584 at delle: sbehnel | 2008-02-22 08:48:16 +0100
doctest fix after namespace prefix change
Modified: lxml/trunk/doc/objectify.txt
==============================================================================
--- lxml/trunk/doc/objectify.txt (original)
+++ lxml/trunk/doc/objectify.txt Fri Feb 22 08:49:41 2008
@@ -979,7 +979,7 @@
... nsmap={'foo': 'http://www.w3.org/2001/XMLSchema'})
>>> for prefix, namespace in el.nsmap.items():
... print prefix, '-', namespace
- ns0 - http://codespeak.net/lxml/objectify/pytype
+ py - http://codespeak.net/lxml/objectify/pytype
foo - http://www.w3.org/2001/XMLSchema
xsi - http://www.w3.org/2001/XMLSchema-instance
@@ -994,7 +994,7 @@
... 'myxsi': 'http://www.w3.org/2001/XMLSchema-instance'})
>>> for prefix, namespace in el.nsmap.items():
... print prefix, '-', namespace
- ns0 - http://codespeak.net/lxml/objectify/pytype
+ py - http://codespeak.net/lxml/objectify/pytype
foo - http://www.w3.org/2001/XMLSchema
myxsi - http://www.w3.org/2001/XMLSchema-instance
From scoder at codespeak.net Fri Feb 22 08:49:47 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 22 Feb 2008 08:49:47 +0100 (CET)
Subject: [Lxml-checkins] r51773 - in lxml/trunk: . src/lxml/html/tests
Message-ID: <20080222074947.3163916851F@codespeak.net>
Author: scoder
Date: Fri Feb 22 08:49:46 2008
New Revision: 51773
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/html/tests/test_basic.py
Log:
r3585 at delle: sbehnel | 2008-02-22 08:48:34 +0100
enable doctests in lxml.html
Modified: lxml/trunk/src/lxml/html/tests/test_basic.py
==============================================================================
--- lxml/trunk/src/lxml/html/tests/test_basic.py (original)
+++ lxml/trunk/src/lxml/html/tests/test_basic.py Fri Feb 22 08:49:46 2008
@@ -1,10 +1,12 @@
import unittest, sys
from lxml.tests.common_imports import doctest
+import lxml.html
def test_suite():
suite = unittest.TestSuite()
if sys.version_info >= (2,4):
suite.addTests([doctest.DocFileSuite('test_basic.txt')])
+ suite.addTests([doctest.DocTestSuite(lxml.html)])
return suite
if __name__ == '__main__':
From scoder at codespeak.net Fri Feb 22 08:49:51 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 22 Feb 2008 08:49:51 +0100 (CET)
Subject: [Lxml-checkins] r51774 - lxml/trunk
Message-ID: <20080222074951.86C3F168521@codespeak.net>
Author: scoder
Date: Fri Feb 22 08:49:50 2008
New Revision: 51774
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
Log:
r3586 at delle: sbehnel | 2008-02-22 08:48:54 +0100
changelog
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Feb 22 08:49:50 2008
@@ -14,6 +14,9 @@
Bugs fixed
----------
+* The prefix for objectify's pytype namespace was missing from the set
+ of default prefixes.
+
* Memory leak in Schematron (fixed only for libxml2 2.6.31+).
* Error type names in RelaxNG were reported incorrectly.
@@ -23,6 +26,8 @@
Other changes
-------------
+* Enabled doctests for some Python modules (especially ``lxml.html``).
+
* Add a ``method`` argument to ``lxml.html.tostring``
(``method="xml"`` for XHTML output).
From scoder at codespeak.net Fri Feb 22 09:53:59 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 22 Feb 2008 09:53:59 +0100 (CET)
Subject: [Lxml-checkins] r51775 - lxml/trunk
Message-ID: <20080222085359.B42D01684F9@codespeak.net>
Author: scoder
Date: Fri Feb 22 09:53:57 2008
New Revision: 51775
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CREDITS.txt
Log:
r3591 at delle: sbehnel | 2008-02-22 09:43:28 +0100
credits update
Modified: lxml/trunk/CREDITS.txt
==============================================================================
--- lxml/trunk/CREDITS.txt (original)
+++ lxml/trunk/CREDITS.txt Fri Feb 22 09:53:57 2008
@@ -1,14 +1,17 @@
+=======
Credits
--------
+=======
Stefan Behnel - main developer and maintainer
Martijn Faassen - creator of lxml and initial main developer
-Ian Bicking - lxml.html
+Ian Bicking - creator and maintainer of lxml.html
Holger Joukl - bug reports, feedback and development on lxml.objectify
+Sidnei da Sivla - official MS Windows builds
+
Marc-Antoine Parent - XPath extension function help and patches
Olivier Grisel - improved (c)ElementTree compatibility patches,
@@ -24,51 +27,35 @@
Paul Everitt - bug reporting, feedback on API design
-Paul Clifford - Python 2.2 compatibility fixes
-
Victor Ng - Discussions on memory management strategies, vlibxml2
Robert Kern - feedback on API design
-Trent Mick - setup.py patch
-
Andreas Pakulat - rpath linking support, doc improvements
Steve Howe - Windows builds
David Sankel - building statically on Windows
-Noah Slater - bug squeezing
-
-Duncan Booth - bugfixing
-
-Dean Pavlekovic - bug reporting
-
-Julien Anguenot - bug reporting
-
-Wade Leftwich - unicode bug reporting
-
-Kieran Holland - iteration crash bug report
-
-Henrik Thostrup Jensen - bug reporting
-
-dharana - bug reporting
-
-Hamish Lawson - bug reporting
-Gavrie Philipson - bug reporting
+... and lots of other people who contributed to lxml by reporting
+bugs, discussing its functionality or blaming the docs for the bugs in
+their code. Thank you all, user feedback and discussions form a very
+important part of an Open Source project!
-Thanks also to:
----------------
+Special thanks goes to:
+=======================
-* the libxml2 project for a great XML library.
+* the libxml2 project and especially Daniel Veillard for a great XML
+ library.
* Fredrik Lundh for the ElementTree API.
-* pyrex for the binding technology.
+* Greg Ewing (Pyrex) and Robert Bradshaw (Cython) for the binding
+ technology.
* the codespeak crew, in particular Philipp von Weitershausen and
- Holger Krekel for hosting it on codespeak.net
+ Holger Krekel for hosting lxml on codespeak.net
* Infrae for initiating the project.
From scoder at codespeak.net Fri Feb 22 09:54:03 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 22 Feb 2008 09:54:03 +0100 (CET)
Subject: [Lxml-checkins] r51776 - in lxml/trunk: . doc
Message-ID: <20080222085403.C2255168426@codespeak.net>
Author: scoder
Date: Fri Feb 22 09:54:02 2008
New Revision: 51776
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/lxml-source-howto.txt
Log:
r3592 at delle: sbehnel | 2008-02-22 09:53:21 +0100
source howto
Modified: lxml/trunk/doc/lxml-source-howto.txt
==============================================================================
--- lxml/trunk/doc/lxml-source-howto.txt (original)
+++ lxml/trunk/doc/lxml-source-howto.txt Fri Feb 22 09:54:02 2008
@@ -86,9 +86,33 @@
Concepts
--------
-* proxies
-* naming conventions
-* ...
+lxml's tree API is based on proxy objects. That means, every Element
+object (or rather ``_Element`` object) is a proxy for a libxml2 node
+structure. The class declaration is (mainly)::
+
+ cdef class _Element:
+ cdef _Document _doc
+ cdef xmlNode* _c_node
+
+It is a naming convention that C variables and C level class members
+that are passed into libxml2 start with a prefixed ``c_`` (commonly
+libxml2 struct pointers), and that C level class members are prefixed
+with an underscore. So you will often see names like ``c_doc`` for an
+``xmlDoc*`` variable (or ``c_node`` for an ``xmlNode*``), or the above
+``_c_node`` for a class member that points to an ``xmlNode`` struct
+(or ``_c_doc`` for an ``xmlDoc*``).
+
+It is important to know that every proxy in lxml has a factory
+function that properly sets up C level members. Proxy objects must
+*never* be instantiated outside of that factory. For example, to
+instantiate an _Element object or its subclasses, you must always call
+its factory function::
+
+ cdef xmlNode* c_node
+ cdef _Document doc
+ cdef _Element element
+ ...
+ element = _elementFactory(doc, c_node)
The documentation
@@ -127,9 +151,10 @@
==========
The main module, ``lxml.etree``, is in the file **lxml.etree.pyx**.
-It implements the main functions and types of the ElementTree API, and
-is therefore the best place to start if you want to find out how a
-specific feature is implemented.
+It implements the main functions and types of the ElementTree API, as
+well as all the factory functions for proxies. It is the best place
+to start if you want to find out how a specific feature is
+implemented.
At the very end of the file, it contains a series of ``include``
statements that merge the rest of the implementation into the
@@ -140,13 +165,14 @@
The main include files are:
apihelpers.pxi
- Private C helper functions. Most of the little functions that are
- used all over the place are defined here. This includes things
- like reading out the text content of a libxml2 tree node, checking
- input from the API level, creating a new Element node or handling
- attribute values. If you want to work on the lxml code, you
- should keep these functions in the back of your head, as they will
- definitely make your life easier.
+ Private C helper functions. Except for the factory functions,
+ most of the little functions that are used all over the place are
+ defined here. This includes things like reading out the text
+ content of a libxml2 tree node, checking input from the API level,
+ creating a new Element node or handling attribute values. If you
+ want to work on the lxml code, you should keep these functions in
+ the back of your head, as they will definitely make your life
+ easier.
classlookup.pxi
Element class lookup mechanisms. The main API and engines for
@@ -241,28 +267,41 @@
called ``CSSSelector``.
doctestcompare.py
- ...
+ A relaxed comparison scheme for XML/HTML markup in doctest.
ElementInclude.py
- ...
+ XInclude-like document inclusion, compatible with ElementTree.
_elementpath.py
- ...
+ XPath-like path language, compatible with ElementTree.
sax.py
- ...
+ SAX2 compatible interfaces to copy lxml trees from/to SAX compatible
+ tools.
usedoctest.py
- ...
+ Wrapper module for ``doctestcompare.py`` that simplifies its usage
+ from inside a doctest.
lxml.objectify
==============
-* ...
+A Cython implemented extension module that uses the public C-API of
+lxml.etree. It provides a Python object-like interface to XML trees.
+
+
+lxml.pyclasslookup
+==================
+
+A Cython implemented extension module that uses the public C-API of
+lxml.etree. It provides a class lookup scheme that duplicates lxml's
+ElementTree API in a very simple way to provide Python access to the
+tree *before* instantiating the real Python proxies in lxml.etree.
lxml.html
=========
-* ...
+A specialised toolkit for HTML handling, based on lxml.etree. This is
+implemented in pure Python.
From scoder at codespeak.net Fri Feb 22 09:55:51 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 22 Feb 2008 09:55:51 +0100 (CET)
Subject: [Lxml-checkins] r51777 - in lxml/branch/lxml-2.0: . doc src/lxml
src/lxml/html src/lxml/html/tests src/lxml/tests
Message-ID: <20080222085551.C3A87168426@codespeak.net>
Author: scoder
Date: Fri Feb 22 09:55:49 2008
New Revision: 51777
Modified:
lxml/branch/lxml-2.0/CHANGES.txt
lxml/branch/lxml-2.0/CREDITS.txt
lxml/branch/lxml-2.0/Makefile
lxml/branch/lxml-2.0/TODO.txt
lxml/branch/lxml-2.0/doc/lxml-source-howto.txt
lxml/branch/lxml-2.0/doc/objectify.txt
lxml/branch/lxml-2.0/doc/validation.txt
lxml/branch/lxml-2.0/src/lxml/apihelpers.pxi
lxml/branch/lxml-2.0/src/lxml/classlookup.pxi
lxml/branch/lxml-2.0/src/lxml/cstd.pxd
lxml/branch/lxml-2.0/src/lxml/docloader.pxi
lxml/branch/lxml-2.0/src/lxml/html/__init__.py
lxml/branch/lxml-2.0/src/lxml/html/tests/test_basic.py
lxml/branch/lxml-2.0/src/lxml/iterparse.pxi
lxml/branch/lxml-2.0/src/lxml/lxml.etree.pyx
lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx
lxml/branch/lxml-2.0/src/lxml/parser.pxi
lxml/branch/lxml-2.0/src/lxml/proxy.pxi
lxml/branch/lxml-2.0/src/lxml/relaxng.pxi
lxml/branch/lxml-2.0/src/lxml/schematron.pxi
lxml/branch/lxml-2.0/src/lxml/tests/test_css.py
lxml/branch/lxml-2.0/src/lxml/tests/test_dtd.py
lxml/branch/lxml-2.0/src/lxml/tests/test_elementtree.py
lxml/branch/lxml-2.0/src/lxml/tests/test_errors.py
lxml/branch/lxml-2.0/src/lxml/tests/test_etree.py
lxml/branch/lxml-2.0/src/lxml/tests/test_io.py
lxml/branch/lxml-2.0/src/lxml/tests/test_objectify.py
lxml/branch/lxml-2.0/src/lxml/tests/test_unicode.py
lxml/branch/lxml-2.0/src/lxml/tree.pxd
lxml/branch/lxml-2.0/src/lxml/xmlid.pxi
lxml/branch/lxml-2.0/src/lxml/xmlparser.pxd
lxml/branch/lxml-2.0/src/lxml/xmlschema.pxi
lxml/branch/lxml-2.0/src/lxml/xslt.pxi
lxml/branch/lxml-2.0/version.txt
Log:
trunk merge
Modified: lxml/branch/lxml-2.0/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-2.0/CHANGES.txt (original)
+++ lxml/branch/lxml-2.0/CHANGES.txt Fri Feb 22 09:55:49 2008
@@ -8,9 +8,17 @@
Features added
--------------
+* Support passing ``base_url`` to file parser functions to override
+ the filename of the file(-like) object.
+
Bugs fixed
----------
+* The prefix for objectify's pytype namespace was missing from the set
+ of default prefixes.
+
+* Memory leak in Schematron (fixed only for libxml2 2.6.31+).
+
* Error type names in RelaxNG were reported incorrectly.
* Slice deletion bug fixed in objectify.
@@ -18,6 +26,14 @@
Other changes
-------------
+* Enabled doctests for some Python modules (especially ``lxml.html``).
+
+* Add a ``method`` argument to ``lxml.html.tostring``
+ (``method="xml"`` for XHTML output).
+
+* Make it clearer that methods like ``lxml.html.fromstring`` take a
+ ``base_url`` argument.
+
2.0.1 (2008-02-13)
==================
Modified: lxml/branch/lxml-2.0/CREDITS.txt
==============================================================================
--- lxml/branch/lxml-2.0/CREDITS.txt (original)
+++ lxml/branch/lxml-2.0/CREDITS.txt Fri Feb 22 09:55:49 2008
@@ -1,14 +1,17 @@
+=======
Credits
--------
+=======
Stefan Behnel - main developer and maintainer
Martijn Faassen - creator of lxml and initial main developer
-Ian Bicking - lxml.html
+Ian Bicking - creator and maintainer of lxml.html
Holger Joukl - bug reports, feedback and development on lxml.objectify
+Sidnei da Sivla - official MS Windows builds
+
Marc-Antoine Parent - XPath extension function help and patches
Olivier Grisel - improved (c)ElementTree compatibility patches,
@@ -24,51 +27,35 @@
Paul Everitt - bug reporting, feedback on API design
-Paul Clifford - Python 2.2 compatibility fixes
-
Victor Ng - Discussions on memory management strategies, vlibxml2
Robert Kern - feedback on API design
-Trent Mick - setup.py patch
-
Andreas Pakulat - rpath linking support, doc improvements
Steve Howe - Windows builds
David Sankel - building statically on Windows
-Noah Slater - bug squeezing
-
-Duncan Booth - bugfixing
-
-Dean Pavlekovic - bug reporting
-
-Julien Anguenot - bug reporting
-
-Wade Leftwich - unicode bug reporting
-
-Kieran Holland - iteration crash bug report
-
-Henrik Thostrup Jensen - bug reporting
-
-dharana - bug reporting
-
-Hamish Lawson - bug reporting
-Gavrie Philipson - bug reporting
+... and lots of other people who contributed to lxml by reporting
+bugs, discussing its functionality or blaming the docs for the bugs in
+their code. Thank you all, user feedback and discussions form a very
+important part of an Open Source project!
-Thanks also to:
----------------
+Special thanks goes to:
+=======================
-* the libxml2 project for a great XML library.
+* the libxml2 project and especially Daniel Veillard for a great XML
+ library.
* Fredrik Lundh for the ElementTree API.
-* pyrex for the binding technology.
+* Greg Ewing (Pyrex) and Robert Bradshaw (Cython) for the binding
+ technology.
* the codespeak crew, in particular Philipp von Weitershausen and
- Holger Krekel for hosting it on codespeak.net
+ Holger Krekel for hosting lxml on codespeak.net
* Infrae for initiating the project.
Modified: lxml/branch/lxml-2.0/Makefile
==============================================================================
--- lxml/branch/lxml-2.0/Makefile (original)
+++ lxml/branch/lxml-2.0/Makefile Fri Feb 22 09:55:49 2008
@@ -24,6 +24,10 @@
valgrind --tool=memcheck --leak-check=full --num-callers=30 --suppressions=valgrind-python.supp \
$(PYTHON) test.py
+gdb_test_inplace: inplace
+ @echo -e "file $(PYTHON)\nrun test.py" > .gdb.command
+ gdb -x .gdb.command -d src -d src/lxml
+
bench_inplace: inplace
$(PYTHON) benchmark/bench_etree.py -i
$(PYTHON) benchmark/bench_xpath.py -i
@@ -51,6 +55,8 @@
valtest: valgrind_test_inplace
+gdbtest: gdb_test_inplace
+
bench: bench_inplace
ftest: ftest_inplace
Modified: lxml/branch/lxml-2.0/TODO.txt
==============================================================================
--- lxml/branch/lxml-2.0/TODO.txt (original)
+++ lxml/branch/lxml-2.0/TODO.txt Fri Feb 22 09:55:49 2008
@@ -21,6 +21,9 @@
* better exception messages for XPath and schemas based on error log,
e.g. missing namespace mappings in XPath
+* more testing on input/output of encoded filenames, including custom
+ resolvers, relative XSLT imports, ...
+
QName
-----
Modified: lxml/branch/lxml-2.0/doc/lxml-source-howto.txt
==============================================================================
--- lxml/branch/lxml-2.0/doc/lxml-source-howto.txt (original)
+++ lxml/branch/lxml-2.0/doc/lxml-source-howto.txt Fri Feb 22 09:55:49 2008
@@ -86,9 +86,33 @@
Concepts
--------
-* proxies
-* naming conventions
-* ...
+lxml's tree API is based on proxy objects. That means, every Element
+object (or rather ``_Element`` object) is a proxy for a libxml2 node
+structure. The class declaration is (mainly)::
+
+ cdef class _Element:
+ cdef _Document _doc
+ cdef xmlNode* _c_node
+
+It is a naming convention that C variables and C level class members
+that are passed into libxml2 start with a prefixed ``c_`` (commonly
+libxml2 struct pointers), and that C level class members are prefixed
+with an underscore. So you will often see names like ``c_doc`` for an
+``xmlDoc*`` variable (or ``c_node`` for an ``xmlNode*``), or the above
+``_c_node`` for a class member that points to an ``xmlNode`` struct
+(or ``_c_doc`` for an ``xmlDoc*``).
+
+It is important to know that every proxy in lxml has a factory
+function that properly sets up C level members. Proxy objects must
+*never* be instantiated outside of that factory. For example, to
+instantiate an _Element object or its subclasses, you must always call
+its factory function::
+
+ cdef xmlNode* c_node
+ cdef _Document doc
+ cdef _Element element
+ ...
+ element = _elementFactory(doc, c_node)
The documentation
@@ -127,9 +151,10 @@
==========
The main module, ``lxml.etree``, is in the file **lxml.etree.pyx**.
-It implements the main functions and types of the ElementTree API, and
-is therefore the best place to start if you want to find out how a
-specific feature is implemented.
+It implements the main functions and types of the ElementTree API, as
+well as all the factory functions for proxies. It is the best place
+to start if you want to find out how a specific feature is
+implemented.
At the very end of the file, it contains a series of ``include``
statements that merge the rest of the implementation into the
@@ -140,13 +165,14 @@
The main include files are:
apihelpers.pxi
- Private C helper functions. Most of the little functions that are
- used all over the place are defined here. This includes things
- like reading out the text content of a libxml2 tree node, checking
- input from the API level, creating a new Element node or handling
- attribute values. If you want to work on the lxml code, you
- should keep these functions in the back of your head, as they will
- definitely make your life easier.
+ Private C helper functions. Except for the factory functions,
+ most of the little functions that are used all over the place are
+ defined here. This includes things like reading out the text
+ content of a libxml2 tree node, checking input from the API level,
+ creating a new Element node or handling attribute values. If you
+ want to work on the lxml code, you should keep these functions in
+ the back of your head, as they will definitely make your life
+ easier.
classlookup.pxi
Element class lookup mechanisms. The main API and engines for
@@ -241,28 +267,41 @@
called ``CSSSelector``.
doctestcompare.py
- ...
+ A relaxed comparison scheme for XML/HTML markup in doctest.
ElementInclude.py
- ...
+ XInclude-like document inclusion, compatible with ElementTree.
_elementpath.py
- ...
+ XPath-like path language, compatible with ElementTree.
sax.py
- ...
+ SAX2 compatible interfaces to copy lxml trees from/to SAX compatible
+ tools.
usedoctest.py
- ...
+ Wrapper module for ``doctestcompare.py`` that simplifies its usage
+ from inside a doctest.
lxml.objectify
==============
-* ...
+A Cython implemented extension module that uses the public C-API of
+lxml.etree. It provides a Python object-like interface to XML trees.
+
+
+lxml.pyclasslookup
+==================
+
+A Cython implemented extension module that uses the public C-API of
+lxml.etree. It provides a class lookup scheme that duplicates lxml's
+ElementTree API in a very simple way to provide Python access to the
+tree *before* instantiating the real Python proxies in lxml.etree.
lxml.html
=========
-* ...
+A specialised toolkit for HTML handling, based on lxml.etree. This is
+implemented in pure Python.
Modified: lxml/branch/lxml-2.0/doc/objectify.txt
==============================================================================
--- lxml/branch/lxml-2.0/doc/objectify.txt (original)
+++ lxml/branch/lxml-2.0/doc/objectify.txt Fri Feb 22 09:55:49 2008
@@ -817,10 +817,11 @@
>>> print root.c
None
-Note that you can change the name and namespace used for this attribute
-through the ``setPytypeAttributeTag(tag)`` module function, in case your
-application ever needs to. There is also a utility function ``annotate()``
-that recursively generates this attribute for the elements of a tree::
+Note that you can change the name and namespace used for this
+attribute through the ``set_pytype_attribute_tag(tag)`` module
+function, in case your application ever needs to. There is also a
+utility function ``annotate()`` that recursively generates this
+attribute for the elements of a tree::
>>> root = objectify.fromstring("test5")
>>> print objectify.dump(root)
@@ -978,7 +979,7 @@
... nsmap={'foo': 'http://www.w3.org/2001/XMLSchema'})
>>> for prefix, namespace in el.nsmap.items():
... print prefix, '-', namespace
- ns0 - http://codespeak.net/lxml/objectify/pytype
+ py - http://codespeak.net/lxml/objectify/pytype
foo - http://www.w3.org/2001/XMLSchema
xsi - http://www.w3.org/2001/XMLSchema-instance
@@ -993,7 +994,7 @@
... 'myxsi': 'http://www.w3.org/2001/XMLSchema-instance'})
>>> for prefix, namespace in el.nsmap.items():
... print prefix, '-', namespace
- ns0 - http://codespeak.net/lxml/objectify/pytype
+ py - http://codespeak.net/lxml/objectify/pytype
foo - http://www.w3.org/2001/XMLSchema
myxsi - http://www.w3.org/2001/XMLSchema-instance
@@ -1166,7 +1167,7 @@
.. _`namespace specific classes`: element_classes.html#namespace-class-lookup
-See the documentation on `class lookup` schemes for more information.
+See the documentation on `class lookup`_ schemes for more information.
What is different from lxml.etree?
Modified: lxml/branch/lxml-2.0/doc/validation.txt
==============================================================================
--- lxml/branch/lxml-2.0/doc/validation.txt (original)
+++ lxml/branch/lxml-2.0/doc/validation.txt Fri Feb 22 09:55:49 2008
@@ -195,10 +195,7 @@
RELAXNG_ERR_ELEMWRONG
Note that this error log is local to the RelaxNG object. It will only
-contain log entries that appeared during the validation. The
-DocumentInvalid exception raised by the ``assertValid`` method above
-provides access to the global error log (like all other lxml
-exceptions).
+contain log entries that appeared during the validation.
Similar to XSLT, there's also a less efficient but easier shortcut method to
do one-shot RelaxNG validation::
@@ -208,6 +205,13 @@
>>> doc2.relaxng(relaxng_doc)
False
+libxml2 does not currently support the `RelaxNG Compact Syntax`_.
+However, the trang_ translator can convert the compact syntax to the
+XML syntax, which can then be used with lxml.
+
+.. _`RelaxNG Compact Syntax`:
+.. _trang: http://www.thaiopensource.com/relaxng/trang.html
+
XMLSchema
---------
Modified: lxml/branch/lxml-2.0/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/apihelpers.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/apihelpers.pxi Fri Feb 22 09:55:49 2008
@@ -915,23 +915,25 @@
return -1 # invalid!
elif is_non_ascii == 0 and not tree.xmlIsChar_ch(c):
return -1 # invalid!
- s = s + 1
+ s += 1
return is_non_ascii
cdef object funicode(char* s):
cdef Py_ssize_t slen
cdef char* spos
- cdef char c
+ cdef bint is_non_ascii
spos = s
- c = spos[0]
- while c != c'\0':
- if c & 0x80:
+ is_non_ascii = 0
+ while spos[0] != c'\0':
+ if spos[0] & 0x80:
+ is_non_ascii = 1
break
- spos = spos + 1
- c = spos[0]
+ spos += 1
+ while spos[0] != c'\0':
+ spos += 1
slen = spos - s
- if c != c'\0':
- return python.PyUnicode_DecodeUTF8(s, slen+cstd.strlen(spos), NULL)
+ if is_non_ascii:
+ return python.PyUnicode_DecodeUTF8(s, slen, NULL)
return python.PyString_FromStringAndSize(s, slen)
cdef object _utf8(object s):
@@ -948,6 +950,8 @@
return s
cdef object _encodeFilename(object filename):
+ """Make sure a filename is 8-bit encoded (or None).
+ """
if filename is None:
return None
elif python.PyString_Check(filename):
Modified: lxml/branch/lxml-2.0/src/lxml/classlookup.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/classlookup.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/classlookup.pxi Fri Feb 22 09:55:49 2008
@@ -58,7 +58,7 @@
Superclass of Element class lookups.
"""
cdef _element_class_lookup_function _lookup_function
- def __init__(self):
+ def __cinit__(self):
self._lookup_function = NULL # use default lookup
cdef public class FallbackElementClassLookup(ElementClassLookup) \
@@ -71,7 +71,6 @@
cdef readonly ElementClassLookup fallback
cdef _element_class_lookup_function _fallback_function
def __init__(self, ElementClassLookup fallback=None):
- self._lookup_function = NULL # use default lookup
if fallback is not None:
self._setFallback(fallback)
else:
Modified: lxml/branch/lxml-2.0/src/lxml/cstd.pxd
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/cstd.pxd (original)
+++ lxml/branch/lxml-2.0/src/lxml/cstd.pxd Fri Feb 22 09:55:49 2008
@@ -17,6 +17,7 @@
cdef extern from "stdlib.h":
cdef void* malloc(size_t size) nogil
+ cdef void* realloc(void* ptr, size_t size) nogil
cdef void free(void* ptr) nogil
cdef extern from "stdarg.h":
Modified: lxml/branch/lxml-2.0/src/lxml/docloader.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/docloader.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/docloader.pxi Fri Feb 22 09:55:49 2008
@@ -67,8 +67,8 @@
doc_ref._filename = _encodeFilename(filename)
return doc_ref
- def resolve_file(self, f, context):
- """resolve_file(self, f, context)
+ def resolve_file(self, f, context, *, base_url=None):
+ """resolve_file(self, f, context, base_url=None)
Return an open file-like object as input document.
@@ -81,7 +81,10 @@
raise TypeError("Argument is not a file-like object")
doc_ref = _InputDocument()
doc_ref._type = PARSER_DATA_FILE
- doc_ref._filename = _getFilenameForFile(f)
+ if base_url is not None:
+ doc_ref._filename = _encodeFilename(base_url)
+ else:
+ doc_ref._filename = _getFilenameForFile(f)
doc_ref._file = f
return doc_ref
Modified: lxml/branch/lxml-2.0/src/lxml/html/__init__.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/html/__init__.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/html/__init__.py Fri Feb 22 09:55:49 2008
@@ -67,7 +67,7 @@
return self.xpath('//head')[0]
head = property(head, doc=head.__doc__)
- def label__get(self):
+ def _label__get(self):
"""
Get or set any