From ianb at codespeak.net Fri Feb 1 02:38:31 2008 From: ianb at codespeak.net (ianb at codespeak.net) Date: Fri, 1 Feb 2008 02:38:31 +0100 (CET) Subject: [Lxml-checkins] r51162 - lxml/trunk/src/lxml/html Message-ID: <20080201013831.C61DD1684D6@codespeak.net> Author: ianb Date: Fri Feb 1 02:38:30 2008 New Revision: 51162 Added: lxml/trunk/src/lxml/html/_dictmixin.py (contents, props changed) Log: Add DictMixin backport Added: lxml/trunk/src/lxml/html/_dictmixin.py ============================================================================== --- (empty file) +++ lxml/trunk/src/lxml/html/_dictmixin.py Fri Feb 1 02:38:30 2008 @@ -0,0 +1,102 @@ +""" +A backport of UserDict.DictMixin for pre-python-2.4 +""" +__all__ = ['DictMixin'] + +try: + from UserDict import DictMixin +except ImportError: + class DictMixin: + # Mixin defining all dictionary methods for classes that already have + # a minimum dictionary interface including getitem, setitem, delitem, + # and keys. Without knowledge of the subclass constructor, the mixin + # does not define __init__() or copy(). In addition to the four base + # methods, progressively more efficiency comes with defining + # __contains__(), __iter__(), and iteritems(). + + # second level definitions support higher levels + def __iter__(self): + for k in self.keys(): + yield k + def has_key(self, key): + try: + value = self[key] + except KeyError: + return False + return True + def __contains__(self, key): + return self.has_key(key) + + # third level takes advantage of second level definitions + def iteritems(self): + for k in self: + yield (k, self[k]) + def iterkeys(self): + return self.__iter__() + + # fourth level uses definitions from lower levels + def itervalues(self): + for _, v in self.iteritems(): + yield v + def values(self): + return [v for _, v in self.iteritems()] + def items(self): + return list(self.iteritems()) + def clear(self): + for key in self.keys(): + del self[key] + def setdefault(self, key, default=None): + try: + return self[key] + except KeyError: + self[key] = default + return default + def pop(self, key, *args): + if len(args) > 1: + raise TypeError, "pop expected at most 2 arguments, got "\ + + repr(1 + len(args)) + try: + value = self[key] + except KeyError: + if args: + return args[0] + raise + del self[key] + return value + def popitem(self): + try: + k, v = self.iteritems().next() + except StopIteration: + raise KeyError, 'container is empty' + del self[k] + return (k, v) + def update(self, other=None, **kwargs): + # Make progressively weaker assumptions about "other" + if other is None: + pass + elif hasattr(other, 'iteritems'): # iteritems saves memory and lookups + for k, v in other.iteritems(): + self[k] = v + elif hasattr(other, 'keys'): + for k in other.keys(): + self[k] = other[k] + else: + for k, v in other: + self[k] = v + if kwargs: + self.update(kwargs) + def get(self, key, default=None): + try: + return self[key] + except KeyError: + return default + def __repr__(self): + return repr(dict(self.iteritems())) + def __cmp__(self, other): + if other is None: + return 1 + if isinstance(other, DictMixin): + other = dict(other.iteritems()) + return cmp(dict(self.iteritems()), other) + def __len__(self): + return len(self.keys()) From scoder at codespeak.net Fri Feb 1 12:34:14 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Feb 2008 12:34:14 +0100 (CET) Subject: [Lxml-checkins] r51168 - in lxml/trunk: . doc Message-ID: <20080201113414.C2E6E1684CB@codespeak.net> Author: scoder Date: Fri Feb 1 12:34:13 2008 New Revision: 51168 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/FAQ.txt Log: r3373 at delle: sbehnel | 2008-02-01 07:40:54 +0100 separate out Zope in users list Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Fri Feb 1 12:34:13 2008 @@ -120,15 +120,19 @@ * cssutils_, a CSS parser and toolkit, can be used with ``lxml.cssselect`` * Deliverance_, a content theming tool -* gocept.lxml_, Zope3 interface bindings for lxml * Inteproxy_, a secure HTTP proxy * lwebstring_, an XML template engine * OpenXMLlib_, a library for handling OpenXML document meta data * Pycoon_, a WSGI web development framework based on XML pipelines * rfadict_, an RDFa parser with a simple dictionary-like interface. -And a couple of generally happy_ users_, and other `sites that link to -lxml`_. +Zope3 and some of its extensions have good support for lxml: + +* gocept.lxml_, Zope3 interface bindings for lxml +* z3c.rml_, an implementation of ReportLab's RML format + +And don't miss the quotes by our generally happy_ users_, and other +`sites that link to lxml`_. .. _cssutils: http://code.google.com/p/cssutils/source/browse/trunk/examples/style.py?r=917 .. _Deliverance: http://www.openplans.org/projects/deliverance/project-home @@ -138,6 +142,7 @@ .. _OpenXMLlib: http://permalink.gmane.org/gmane.comp.python.lxml.devel/3250 .. _Pycoon: http://pypi.python.org/pypi/pycoon .. _rfadict: http://pypi.python.org/pypi/rdfadict +.. _z3c.rml: http://pypi.python.org/pypi/z3c.rml .. _happy: http://thread.gmane.org/gmane.comp.python.lxml.devel/3244/focus=3244 .. _users: http://article.gmane.org/gmane.comp.python.lxml.devel/3246 From scoder at codespeak.net Fri Feb 1 12:34:18 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Feb 2008 12:34:18 +0100 (CET) Subject: [Lxml-checkins] r51169 - lxml/trunk Message-ID: <20080201113418.923DD1684CC@codespeak.net> Author: scoder Date: Fri Feb 1 12:34:17 2008 New Revision: 51169 Modified: lxml/trunk/ (props changed) lxml/trunk/Makefile Log: r3374 at delle: sbehnel | 2008-02-01 10:16:04 +0100 fixed benchmark run from Makefile Modified: lxml/trunk/Makefile ============================================================================== --- lxml/trunk/Makefile (original) +++ lxml/trunk/Makefile Fri Feb 1 12:34:17 2008 @@ -25,7 +25,10 @@ $(PYTHON) test.py bench_inplace: inplace - $(PYTHON) bench.py -i + $(PYTHON) benchmark/bench_etree.py -i + $(PYTHON) benchmark/bench_xpath.py -i + $(PYTHON) benchmark/bench_xslt.py -i + $(PYTHON) benchmark/bench_objectify.py -i ftest_build: build $(PYTHON) test.py -f $(TESTFLAGS) $(TESTOPTS) From scoder at codespeak.net Fri Feb 1 12:34:22 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Feb 2008 12:34:22 +0100 (CET) Subject: [Lxml-checkins] r51170 - in lxml/trunk: . doc Message-ID: <20080201113422.C96BA1684CF@codespeak.net> Author: scoder Date: Fri Feb 1 12:34:22 2008 New Revision: 51170 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/tutorial.txt Log: r3375 at delle: sbehnel | 2008-02-01 11:40:10 +0100 tutorial update: mention parsing from URLs Modified: lxml/trunk/doc/tutorial.txt ============================================================================== --- lxml/trunk/doc/tutorial.txt (original) +++ lxml/trunk/doc/tutorial.txt Fri Feb 1 12:34:22 2008 @@ -546,11 +546,12 @@ Parsing from strings and files ============================== -``lxml.etree`` supports parsing XML in a number of ways and from all important -sources, namely strings, files and file-like objects. The main parse -functions are ``fromstring()`` and ``parse()``, both called with the source as -first argument. By default, they use the standard parser, but you can always -pass a different parser as second argument. +``lxml.etree`` supports parsing XML in a number of ways and from all +important sources, namely strings, files, URLs (http/ftp) and +file-like objects. The main parse functions are ``fromstring()`` and +``parse()``, both called with the source as first argument. By +default, they use the standard parser, but you can always pass a +different parser as second argument. The fromstring() function @@ -601,6 +602,20 @@ >>> print etree.tostring(root) data +The ``parse()`` function supports any of the following sources: + +* an open file object + +* a file-like object that has a ``.read(byte_count)`` method returning + a byte string on each call + +* a filename string + +* an HTTP or FTP URL string + +Note that passing a filename or URL is usually faster than passing an +open file. + Parser objects -------------- From scoder at codespeak.net Fri Feb 1 12:34:26 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Feb 2008 12:34:26 +0100 (CET) Subject: [Lxml-checkins] r51171 - in lxml/trunk: . doc Message-ID: <20080201113426.E21A31684D6@codespeak.net> Author: scoder Date: Fri Feb 1 12:34:26 2008 New Revision: 51171 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/FAQ.txt Log: r3376 at delle: sbehnel | 2008-02-01 11:40:55 +0100 FAQ update: Cython is *almost* Python Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Fri Feb 1 12:34:26 2008 @@ -320,19 +320,23 @@ Why is lxml not written in Python? ---------------------------------- -lxml interfaces with two C libraries: libxml2 and libxslt. Accessing them at -the C-level is required for performance reasons. +It *almost* is. -To avoid writing plain C-code and caring too much about the details of -built-in types and reference counting, lxml is written in Cython_, a -Python-like language that is translated into C-code. Chances are that if you -know Python, you can write `code that Cython accepts`_. Again, the C-ish style -used in the lxml code is just for performance optimisations. If you want to -contribute, don't bother with the details, a Python implementation of your -contribution is better than none. And keep in mind that lxml's flexible API -often favours an implementation of features in pure Python, without bothering -with C-code at all. For example, the ``lxml.html`` package is entirely written -in Python. +lxml is not written in plain Python, because it interfaces with two C +libraries: libxml2 and libxslt. Accessing them at the C-level is +required for performance reasons. + +However, to avoid writing plain C-code and caring too much about the +details of built-in types and reference counting, lxml is written in +Cython_, a Python-like language that is translated into C-code. +Chances are that if you know Python, you can write `code that Cython +accepts`_. Again, the C-ish style used in the lxml code is just for +performance optimisations. If you want to contribute, don't bother +with the details, a Python implementation of your contribution is +better than none. And keep in mind that lxml's flexible API often +favours an implementation of features in pure Python, without +bothering with C-code at all. For example, the ``lxml.html`` package +is entirely written in Python. Please contact the `mailing list`_ if you need any help. From scoder at codespeak.net Fri Feb 1 12:34:31 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Feb 2008 12:34:31 +0100 (CET) Subject: [Lxml-checkins] r51172 - in lxml/trunk: . doc doc/html Message-ID: <20080201113431.D05C41684CB@codespeak.net> Author: scoder Date: Fri Feb 1 12:34:30 2008 New Revision: 51172 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/html/style.css lxml/trunk/doc/main.txt Log: r3377 at delle: sbehnel | 2008-02-01 12:33:19 +0100 added page quote to project page Modified: lxml/trunk/doc/html/style.css ============================================================================== --- lxml/trunk/doc/html/style.css (original) +++ lxml/trunk/doc/html/style.css Fri Feb 1 12:34:30 2008 @@ -205,12 +205,31 @@ font-style: italic; } -div.line-block { +div.eyecatcher { font-family: Times, "Times New Roman", serif; text-align: center; font-size: 140%; } +div.pagequote { + position: absolute; + top: 0px; + right: 0px; + padding: 10px 10px 0 0; + text-align: right; + font-size: 80%; + color: #990000; +} + +div.pagequote .reference { + font-size: 140%; +} + +html > .pagequote { + /* ignored by IE -> everyone else knows 'fixed', right? */ + position: fixed; +} + code { color: Black; background-color: #cccccc; Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Fri Feb 1 12:34:30 2008 @@ -5,11 +5,18 @@ :description: lxml - the most feature-rich and easy-to-use library for working with XML and HTML in the Python language :keywords: lxml, etree, objectify, Python, XML, HTML +.. class:: eyecatcher + | lxml is the most feature-rich | and easy-to-use library | for working with XML and HTML | in the Python language. +.. class:: pagequote + +| `? lxml takes all the pain out of XML. ? `_ +| Stephan Richter + .. 1 Introduction 2 Documentation From scoder at codespeak.net Fri Feb 1 12:34:35 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Feb 2008 12:34:35 +0100 (CET) Subject: [Lxml-checkins] r51173 - in lxml/trunk: . doc Message-ID: <20080201113435.182AE1684CC@codespeak.net> Author: scoder Date: Fri Feb 1 12:34:34 2008 New Revision: 51173 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/tutorial.txt Log: r3378 at delle: sbehnel | 2008-02-01 12:33:42 +0100 explain different return values of fromstring() and parse() Modified: lxml/trunk/doc/tutorial.txt ============================================================================== --- lxml/trunk/doc/tutorial.txt (original) +++ lxml/trunk/doc/tutorial.txt Fri Feb 1 12:34:34 2008 @@ -602,6 +602,10 @@ >>> print etree.tostring(root) data +The reasoning behind this difference is that ``parse()`` returns a +complete document from a file, while the string parsing functions are +commonly used to parse XML fragments. + The ``parse()`` function supports any of the following sources: * an open file object From scoder at codespeak.net Fri Feb 1 14:00:45 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Feb 2008 14:00:45 +0100 (CET) Subject: [Lxml-checkins] r51176 - in lxml/trunk: . doc Message-ID: <20080201130045.8DBD1168471@codespeak.net> Author: scoder Date: Fri Feb 1 14:00:42 2008 New Revision: 51176 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/validation.txt Log: r3387 at delle: sbehnel | 2008-02-01 13:58:46 +0100 skip catalog test Modified: lxml/trunk/doc/validation.txt ============================================================================== --- lxml/trunk/doc/validation.txt (original) +++ lxml/trunk/doc/validation.txt Fri Feb 1 14:00:42 2008 @@ -71,9 +71,9 @@ If the validation fails (be it for a DTD or an XML schema), the parser will raise an exception:: - >>> root = etree.fromstring("not int", parser) + >>> root = etree.fromstring("no int", parser) Traceback (most recent call last): - XMLSyntaxError: Element 'a': 'not int' is not a valid value of the atomic type 'xs:integer'. + XMLSyntaxError: Element 'a': 'no int' is not a valid value of the atomic type 'xs:integer'. If you want the parser to succeed regardless of the outcome of the validation, you should use a non validating parser and run the @@ -111,15 +111,11 @@ :1:0:ERROR:VALID:DTD_NOT_EMPTY: Element b was declared EMPTY this one has content As an alternative to parsing from a file, you can use the -``external_id`` keyword argument to parse from a catalog:: +``external_id`` keyword argument to parse from a catalog. The +following example reads the DocBook DTD in version 4.2, if available +in the system catalog:: - >>> docbook = "-//OASIS//DTD DocBook XML V4.2//EN" - >>> dtd = etree.DTD(external_id = docbook) # requires catalog support - - >>> root = etree.XML("
") - >>> dtd.assertValid(root) # doctest: +ELLIPSIS - Traceback (most recent call last): - DocumentInvalid: Element article content does not follow the DTD, ... + dtd = etree.DTD(external_id = "-//OASIS//DTD DocBook XML V4.2//EN") RelaxNG From scoder at codespeak.net Fri Feb 1 14:54:28 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Feb 2008 14:54:28 +0100 (CET) Subject: [Lxml-checkins] r51180 - in lxml/trunk: . src/lxml Message-ID: <20080201135428.5D11B1684C2@codespeak.net> Author: scoder Date: Fri Feb 1 14:54:27 2008 New Revision: 51180 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/dtd.pxi Log: r3390 at delle: sbehnel | 2008-02-01 14:52:59 +0100 handle unicode URLs in DTD(), just in case Modified: lxml/trunk/src/lxml/dtd.pxi ============================================================================== --- lxml/trunk/src/lxml/dtd.pxi (original) +++ lxml/trunk/src/lxml/dtd.pxi Fri Feb 1 14:54:27 2008 @@ -31,7 +31,8 @@ self._c_dtd = NULL _Validator.__init__(self) if file is not None: - if python._isString(file): + if _isString(file): + file = _encodeFilename(file) self._error_log.connect() self._c_dtd = xmlparser.xmlParseDTD(NULL, _cstr(file)) self._error_log.disconnect() From scoder at codespeak.net Fri Feb 1 14:54:33 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Feb 2008 14:54:33 +0100 (CET) Subject: [Lxml-checkins] r51181 - in lxml/trunk: . doc Message-ID: <20080201135433.64C791684C9@codespeak.net> Author: scoder Date: Fri Feb 1 14:54:32 2008 New Revision: 51181 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/doc/main.txt lxml/trunk/version.txt Log: r3391 at delle: sbehnel | 2008-02-01 14:53:55 +0100 prepare release of 2.0 Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Feb 1 14:54:32 2008 @@ -2,8 +2,8 @@ lxml changelog ============== -Under development -================= +2.0 (2008-02-01) +================ Features added -------------- Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Fri Feb 1 14:54:32 2008 @@ -5,6 +5,11 @@ :description: lxml - the most feature-rich and easy-to-use library for working with XML and HTML in the Python language :keywords: lxml, etree, objectify, Python, XML, HTML +.. class:: pagequote + +| `? lxml takes all the pain out of XML. ? `_ +| Stephan Richter + .. class:: eyecatcher | lxml is the most feature-rich @@ -12,11 +17,6 @@ | for working with XML and HTML | in the Python language. -.. class:: pagequote - -| `? lxml takes all the pain out of XML. ? `_ -| Stephan Richter - .. 1 Introduction 2 Documentation @@ -44,10 +44,6 @@ .. _FAQ: FAQ.html -**This page describes the current in-development version of lxml that will -become lxml 2.0.** - - Documentation ------------- @@ -149,8 +145,8 @@ .. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/ .. _`this key`: pubkey.asc -The latest version is `lxml 2.0beta2`_, released 2008-01-26 -(`changes for 2.0beta2`_). `Older versions`_ are listed below. +The latest version is `lxml 2.0`_, released 2008-02-01 +(`changes for 2.0`_). `Older versions`_ are listed below. .. _`Older versions`: #old-versions @@ -210,6 +206,8 @@ Old Versions ------------ +* `lxml 2.0beta2`_, released 2008-01-26 (`changes for 2.0beta2`_) + * `lxml 2.0beta1`_, released 2008-01-11 (`changes for 2.0beta1`_) * `lxml 2.0alpha6`_, released 2007-12-19 (`changes for 2.0alpha6`_) @@ -274,6 +272,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.0`: lxml-2.0.tgz .. _`lxml 2.0beta2`: lxml-2.0beta2.tgz .. _`lxml 2.0beta1`: lxml-2.0beta1.tgz .. _`lxml 2.0alpha6`: lxml-2.0alpha6.tgz @@ -307,6 +306,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.0`: changes-2.0.html .. _`changes for 2.0beta2`: changes-2.0beta2.html .. _`changes for 2.0beta1`: changes-2.0beta1.html .. _`changes for 2.0alpha6`: changes-2.0alpha6.html Modified: lxml/trunk/version.txt ============================================================================== --- lxml/trunk/version.txt (original) +++ lxml/trunk/version.txt Fri Feb 1 14:54:32 2008 @@ -1 +1 @@ -2.0beta2 +2.0 From scoder at codespeak.net Fri Feb 1 16:26:10 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Feb 2008 16:26:10 +0100 (CET) Subject: [Lxml-checkins] r51188 - in lxml/trunk: . doc src/lxml src/lxml/tests Message-ID: <20080201152610.1CF221684C2@codespeak.net> Author: scoder Date: Fri Feb 1 16:26:09 2008 New Revision: 51188 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/doc/lxml2.txt lxml/trunk/doc/parsing.txt lxml/trunk/doc/tutorial.txt lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/serializer.pxi lxml/trunk/src/lxml/tests/test_etree.py Log: r3394 at delle: sbehnel | 2008-02-01 15:57:58 +0100 deprecate et.tounicode() in favour of et.tostring(encoding=unicode) Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Feb 1 16:26:09 2008 @@ -8,6 +8,10 @@ Features added -------------- +* Passing the ``unicode`` type as ``encoding`` to ``tostring()`` will + serialise to unicode. The ``tounicode()`` function is now + deprecated. + * ``XMLSchema()`` and ``RelaxNG()`` can parse from StringIO. * ``makeparser()`` function in ``lxml.objectify`` to create a new Modified: lxml/trunk/doc/lxml2.txt ============================================================================== --- lxml/trunk/doc/lxml2.txt (original) +++ lxml/trunk/doc/lxml2.txt Fri Feb 1 16:26:09 2008 @@ -7,7 +7,7 @@ 1 Changes in etree and objectify 1.1 Incompatible changes 1.2 Enhancements - 1.3 Other changes + 1.3 Deprecated features 2 New modules 2.1 lxml.html 2.2 lxml.cssselect @@ -136,8 +136,23 @@ path expression, e.g. by the ``string()`` function or extension functions, will return None as their parent. +* Parse time XML Schema validation is now supported by passing an + XMLSchema object to the ``schema`` keyword argument of a parser. + +* The parsers support a ``target`` object that implements + ElementTree's `TreeBuilder interface`_. + .. _`E factory`: objectify.html#tree-generation-with-the-e-factory .. _`find the Element that carries it`: tutorial.html#using-xpath-to-find-text +.. _`TreeBuilder interface`: http://effbot.org/elementtree/elementtree-treebuilder.htm + + +Deprecated features +------------------- + +The following features were deprecated and will be removed in lxml 2.1: + +* The ``tounicode()`` function was replaced by ``tostring(encoding=unicode)``. New modules Modified: lxml/trunk/doc/parsing.txt ============================================================================== --- lxml/trunk/doc/parsing.txt (original) +++ lxml/trunk/doc/parsing.txt Fri Feb 1 16:26:09 2008 @@ -488,9 +488,9 @@ Serialising to Unicode strings ------------------------------ -To serialize the result, you would normally use the ``tostring`` module -function, which serializes to plain ASCII by default or a number of other -encodings if asked for:: +To serialize the result, you would normally use the ``tostring()`` +module function, which serializes to plain ASCII by default or a +number of other byte encodings if asked for:: >>> etree.tostring(root) '  +  ' @@ -498,33 +498,36 @@ >>> etree.tostring(root, encoding='UTF-8', xml_declaration=False) ' \xef\xa3\x91 + \xef\xa3\x92 ' -As an extension, lxml.etree has a new ``tounicode()`` function that you can -call on XML tree objects to retrieve a Python unicode representation:: +As an extension, lxml.etree recognises the unicode type as encoding to +build a Python unicode representation of a tree:: - >>> etree.tounicode(root) + >>> etree.tostring(root, encoding=unicode) u' \uf8d1 + \uf8d2 ' >>> el = etree.Element("test") - >>> etree.tounicode(el) + >>> etree.tostring(el, encoding=unicode) u'' >>> subel = etree.SubElement(el, "subtest") - >>> etree.tounicode(el) + >>> etree.tostring(el, encoding=unicode) u'' >>> tree = etree.ElementTree(el) - >>> etree.tounicode(tree) + >>> etree.tostring(tree, encoding=unicode) u'' -The result of ``tounicode()`` can be treated like any other Python unicode -string and then passed back into the parsers. However, if you want to save -the result to a file or pass it over the network, you should use ``write()`` -or ``tostring()`` with an encoding argument (typically UTF-8) to serialize the -XML. The main reason is that unicode strings returned by ``tounicode()`` -never have an XML declaration and therefore do not specify their encoding. -These strings are most likely not parsable by other XML libraries. - -In contrast, the ``tostring()`` function automatically adds a declaration as -needed that reflects the encoding of the returned string. This makes it -possible for other parsers to correctly parse the XML byte stream. Note that -using ``tostring()`` with UTF-8 is also considerably faster in most cases. +The result of ``tostring(encoding=unicode)`` can be treated like any +other Python unicode string and then passed back into the parsers. +However, if you want to save the result to a file or pass it over the +network, you should use ``write()`` or ``tostring()`` with a byte +encoding (typically UTF-8) to serialize the XML. The main reason is +that unicode strings returned by ``tostring(encoding=unicode)`` are +not byte streams and they never have an XML declaration to specify +their encoding. These strings are most likely not parsable by other +XML libraries. + +For normal byte encodings, the ``tostring()`` function automatically +adds a declaration as needed that reflects the encoding of the +returned string. This makes it possible for other parsers to +correctly parse the XML byte stream. Note that using ``tostring()`` +with UTF-8 is also considerably faster in most cases. Modified: lxml/trunk/doc/tutorial.txt ============================================================================== --- lxml/trunk/doc/tutorial.txt (original) +++ lxml/trunk/doc/tutorial.txt Fri Feb 1 16:26:09 2008 @@ -486,9 +486,10 @@ >>> print etree.tostring(root, method='text') HelloWorld -For the plain text output, the ``tounicode()`` function might become handy:: +For the plain text output, serialising to a Python unicode string +might become handy. Just pass the ``unicode`` type as encoding:: - >>> etree.tounicode(root, method='text') + >>> etree.tostring(root, encoding=unicode, method='text') u'HelloWorld' Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Feb 1 16:26:09 2008 @@ -13,6 +13,9 @@ except AttributeError: from sets import Set as set +cdef object _unicode +_unicode = __builtin__.unicode + del __builtin__ cdef object os_path_join @@ -2253,6 +2256,9 @@ 'xml_declaration' (bool). Note that changing the encoding to a non UTF-8 compatible encoding will enable a declaration by default. + You can also serialise to a Unicode string without declaration by + passing the ``unicode`` function as encoding. + The keyword argument 'pretty_print' (bool) enables formatted XML. The keyword argument 'method' selects the output method: 'xml', @@ -2263,7 +2269,12 @@ on the tail text of children, which will always be serialised. """ cdef bint write_declaration - if xml_declaration is None: + if encoding is _unicode: + if xml_declaration: + raise ValueError( + "Serialisation to unicode must not request an XML declaration") + write_declaration = 0 + elif xml_declaration is None: # by default, write an XML declaration only for non-standard encodings write_declaration = encoding is not None and encoding.upper() not in \ ('ASCII', 'UTF-8', 'UTF8', 'US-ASCII') @@ -2309,6 +2320,8 @@ You can prevent the tail text of the element from being serialised by passing the boolean ``with_tail`` option. This has no impact on the tail text of children, which will always be serialised. + + @deprecated: use ``tostring(el, encoding=unicode)`` instead. """ if isinstance(element_or_tree, _Element): return _tounicode(<_Element>element_or_tree, method, 0, pretty_print, Modified: lxml/trunk/src/lxml/serializer.pxi ============================================================================== --- lxml/trunk/src/lxml/serializer.pxi (original) +++ lxml/trunk/src/lxml/serializer.pxi Fri Feb 1 16:26:09 2008 @@ -57,6 +57,9 @@ return None if encoding is None: c_enc = NULL + elif encoding is _unicode: + return _tounicode(element, method, write_complete_document, + pretty_print, with_tail) else: encoding = _utf8(encoding) c_enc = _cstr(encoding) Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Fri Feb 1 16:26:09 2008 @@ -2021,6 +2021,75 @@ result = tounicode(a, pretty_print=True) self.assertEquals(result, "\n \n \n\n") + def test_tostring_unicode(self): + tostring = self.etree.tostring + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + + self.assert_(isinstance(tostring(a, encoding=unicode), unicode)) + self.assertEquals('', + canonicalize(tostring(a, encoding=unicode))) + + def test_tostring_unicode_element(self): + tostring = self.etree.tostring + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(c, 'd') + self.assert_(isinstance(tostring(b, encoding=unicode), unicode)) + self.assert_(isinstance(tostring(c, encoding=unicode), unicode)) + self.assertEquals('', + canonicalize(tostring(b, encoding=unicode))) + self.assertEquals('', + canonicalize(tostring(c, encoding=unicode))) + + def test_tostring_unicode_none(self): + tostring = self.etree.tostring + self.assertRaises(TypeError, self.etree.tostring, + None, encoding=unicode) + + def test_tostring_unicode_element_tail(self): + tostring = self.etree.tostring + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(c, 'd') + b.tail = 'Foo' + + self.assert_(isinstance(tostring(b, encoding=unicode), unicode)) + self.assert_(tostring(b, encoding=unicode) == 'Foo' or + tostring(b, encoding=unicode) == 'Foo') + + def test_tostring_unicode_pretty(self): + tostring = self.etree.tostring + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + + result = tostring(a, encoding=unicode) + self.assertEquals(result, "") + + result = tostring(a, encoding=unicode, pretty_print=False) + self.assertEquals(result, "") + + result = tostring(a, encoding=unicode, pretty_print=True) + self.assertEquals(result, "\n \n \n\n") + + # helper methods + def _writeElement(self, element, encoding='us-ascii'): """Write out element for comparison. """ From scoder at codespeak.net Fri Feb 1 16:26:16 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Feb 2008 16:26:16 +0100 (CET) Subject: [Lxml-checkins] r51189 - in lxml/trunk: . doc src/lxml Message-ID: <20080201152616.755571684CC@codespeak.net> Author: scoder Date: Fri Feb 1 16:26:15 2008 New Revision: 51189 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/lxml2.txt lxml/trunk/src/lxml/xmlerror.pxi Log: r3395 at delle: sbehnel | 2008-02-01 16:25:37 +0100 cleanup in deprecated functions, say what will be removed in lxml 2.1 Modified: lxml/trunk/doc/lxml2.txt ============================================================================== --- lxml/trunk/doc/lxml2.txt (original) +++ lxml/trunk/doc/lxml2.txt Fri Feb 1 16:26:15 2008 @@ -7,11 +7,11 @@ 1 Changes in etree and objectify 1.1 Incompatible changes 1.2 Enhancements - 1.3 Deprecated features + 1.3 Deprecation 2 New modules - 2.1 lxml.html - 2.2 lxml.cssselect - 2.3 lxml.doctestcompare + 2.1 lxml.usedoctest + 2.2 lxml.html + 2.3 lxml.cssselect During the development of the lxml 1.x series, a couple of quirks were @@ -147,12 +147,32 @@ .. _`TreeBuilder interface`: http://effbot.org/elementtree/elementtree-treebuilder.htm -Deprecated features -------------------- +Deprecation +----------- -The following features were deprecated and will be removed in lxml 2.1: +The following functions and methods were deprecated and will be +removed in lxml 2.1: -* The ``tounicode()`` function was replaced by ``tostring(encoding=unicode)``. +* The ``tounicode()`` function was replaced by the call + ``tostring(encoding=unicode)``. + +* CamelCaseNamed module functions were renamed to their underscore + equivalents to follow `PEP 8`_ in naming. + + - ``etree.setDefaultParser()`` -> ``etree.set_default_parser()`` + + - ``etree.getDefaultParser()`` -> ``etree.get_default_parser()`` + + - ``etree.useGlobalPythonLog()`` -> ``etree.use_global_python_log()`` + + - ``XMLParser.setElementClassLookup()`` -> ``.set_element_class_lookup()`` + + - ``HTMLParser.setElementClassLookup()`` -> ``.set_element_class_lookup()`` + +* The ``.getiterator()`` method on Elements and ElementTrees was + renamed to ``.iter()`` to follow ElementTree 1.3. + +.. _`PEP 8`: http://www.python.org/dev/peps/pep-0008/ New modules Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Fri Feb 1 16:26:15 2008 @@ -4,9 +4,17 @@ # module level API functions +def clear_error_log(): + """Clear the global error log. Note that this log is already bound to a + fixed size. + """ + __GLOBAL_ERROR_LOG.clear() + def clearErrorLog(): """Clear the global error log. Note that this log is already bound to a fixed size. + + @deprecated: use ``clear_error_log()`` instead. """ __GLOBAL_ERROR_LOG.clear() @@ -386,6 +394,17 @@ Note that this disables access to the global error log from exceptions. Parsers, XSLT etc. will continue to provide their normal local error log. + + @deprecated: use ``use_global_python_log()`` instead. + """ + use_global_python_log(log) + +def use_global_python_log(PyErrorLog log not None): + """Replace the global error log by an etree.PyErrorLog that uses the + standard Python logging package. + + Note that this disables access to the global error log from exceptions. + Parsers, XSLT etc. will continue to provide their normal local error log. """ global __GLOBAL_ERROR_LOG __GLOBAL_ERROR_LOG = log From scoder at codespeak.net Fri Feb 1 18:25:30 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Feb 2008 18:25:30 +0100 (CET) Subject: [Lxml-checkins] r51190 - in lxml/trunk: . doc Message-ID: <20080201172530.6661C1684FE@codespeak.net> Author: scoder Date: Fri Feb 1 18:25:28 2008 New Revision: 51190 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/lxml2.txt Log: r3398 at delle: sbehnel | 2008-02-01 18:24:58 +0100 more on: what's new Modified: lxml/trunk/doc/lxml2.txt ============================================================================== --- lxml/trunk/doc/lxml2.txt (original) +++ lxml/trunk/doc/lxml2.txt Fri Feb 1 18:25:28 2008 @@ -70,6 +70,13 @@ with cElementTree, which also enforces keyword-only arguments in a couple of places. +* XML tag names are validated when creating an Element. This does not + apply to HTML tags, where only HTML special characters are + forbidden. The distinction is made by the ``SubElement()`` factory, + which tests if the tree it works on is an HTML tree, and by the + ``.makeelement()`` methods of parsers, which behave differently for + the ``XMLParser()`` and the ``HTMLParser()``. + * XPath now raises exceptions specific to the part of the execution that failed: ``XPathSyntaxError`` for parser errors and ``XPathEvalError`` for errors that occurred during the evaluation. Note that the distinction only @@ -136,11 +143,25 @@ path expression, e.g. by the ``string()`` function or extension functions, will return None as their parent. -* Parse time XML Schema validation is now supported by passing an +* Setting a ``QName`` object as value of the ``.text`` property or as + an attribute value will resolve its prefix in the respective context + +* Following ElementTree 1.3, the ``iterfind()`` method supports + efficient iteration based on XPath-like expressions. + +The parsers also received some major enhancements: + +* ``iterparse()`` can parse HTML when passing the boolean ``html`` + keyword. + +* Parse time XML Schema validation by passing an XMLSchema object to the ``schema`` keyword argument of a parser. -* The parsers support a ``target`` object that implements - ElementTree's `TreeBuilder interface`_. +* Support for a ``target`` object that implements ElementTree's + `TreeBuilder interface`_. + +* The ``encoding`` keyword allows overriding the document encoding. + .. _`E factory`: objectify.html#tree-generation-with-the-e-factory .. _`find the Element that carries it`: tutorial.html#using-xpath-to-find-text @@ -150,14 +171,14 @@ Deprecation ----------- -The following functions and methods were deprecated and will be -removed in lxml 2.1: +The following functions and methods are now deprecated. They are +still available in lxml 2.0 and will be removed in lxml 2.1: * The ``tounicode()`` function was replaced by the call ``tostring(encoding=unicode)``. -* CamelCaseNamed module functions were renamed to their underscore - equivalents to follow `PEP 8`_ in naming. +* CamelCaseNamed module functions and methods were renamed to their + underscore equivalents to follow `PEP 8`_ in naming. - ``etree.setDefaultParser()`` -> ``etree.set_default_parser()`` From scoder at codespeak.net Fri Feb 1 19:01:30 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Feb 2008 19:01:30 +0100 (CET) Subject: [Lxml-checkins] r51191 - lxml/trunk Message-ID: <20080201180130.7EB7E1684ED@codespeak.net> Author: scoder Date: Fri Feb 1 19:01:29 2008 New Revision: 51191 Modified: lxml/trunk/ (props changed) lxml/trunk/MANIFEST.in Log: r3400 at delle: sbehnel | 2008-02-01 19:01:03 +0100 removed reference to non-existing file Modified: lxml/trunk/MANIFEST.in ============================================================================== --- lxml/trunk/MANIFEST.in (original) +++ lxml/trunk/MANIFEST.in Fri Feb 1 19:01:29 2008 @@ -12,4 +12,4 @@ recursive-include doc *.txt *.html *.css *.xml *.mgp pubkey.asc tagpython.png recursive-include fake_pyrex *.py include doc/mkhtml.py doc/rest2html.py -exclude doc/pyrex.txt src/lxml/etree.pxi +exclude doc/pyrex.txt From scoder at codespeak.net Fri Feb 1 19:03:58 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Feb 2008 19:03:58 +0100 (CET) Subject: [Lxml-checkins] r51192 - lxml/tag/lxml-2.0 Message-ID: <20080201180358.7564E1684EF@codespeak.net> Author: scoder Date: Fri Feb 1 19:03:57 2008 New Revision: 51192 Added: lxml/tag/lxml-2.0/ - copied from r51191, lxml/trunk/ Log: tag for lxml 2.0 From scoder at codespeak.net Fri Feb 1 19:33:33 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Feb 2008 19:33:33 +0100 (CET) Subject: [Lxml-checkins] r51193 - lxml/branch/lxml-2.0 Message-ID: <20080201183333.635571684DB@codespeak.net> Author: scoder Date: Fri Feb 1 19:33:32 2008 New Revision: 51193 Added: lxml/branch/lxml-2.0/ - copied from r51192, lxml/tag/lxml-2.0/ Log: new branch for lxml 2.0 series From scoder at codespeak.net Sun Feb 3 21:00:57 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 3 Feb 2008 21:00:57 +0100 (CET) Subject: [Lxml-checkins] r51232 - in lxml/trunk: . doc Message-ID: <20080203200057.52C011683D1@codespeak.net> Author: scoder Date: Sun Feb 3 21:00:56 2008 New Revision: 51232 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/build.txt Log: r3405 at delle: sbehnel | 2008-02-02 16:32:10 +0100 doc clarification Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Sun Feb 3 21:00:56 2008 @@ -165,10 +165,11 @@ an older version. The result can be segfaults on this platform that are hard to track down. -To make sure the newer libxml2 and libxslt versions are used (e.g. under -fink), you should add the directory where you installed the libraries to the -``DYLD_LIBRARY_PATH`` environment variable. This seems to fix a lot of -problems for users. +To make sure the newer libxml2 and libxslt versions are used +(e.g. those provided by fink or macports), you should add the +directory where you installed the libraries to the +``DYLD_LIBRARY_PATH`` environment variable *at runtime* (i.e. not just +at build time). This seems to fix a lot of problems for users. Alternatively, you can build lxml statically. A way to do this on MS Windows is described in the next section, but it should be easy to adapt it for From lxml-checkins at codespeak.net Sun Feb 3 21:31:52 2008 From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net) Date: Sun, 3 Feb 2008 21:31:52 +0100 (CET) Subject: [Lxml-checkins] February 50% OFF Message-ID: <20080203143126.4928.qmail@ppp85-140-54-38.pppoe.mtu-net.ru> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20080203/29cc36dd/attachment.htm From lxml-checkins at codespeak.net Mon Feb 4 10:18:21 2008 From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net) Date: Mon, 4 Feb 2008 10:18:21 +0100 (CET) Subject: [Lxml-checkins] January 72% OFF Message-ID: <20080204131810.8322.qmail@dsl88.241-13866.ttnet.net.tr> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20080204/f33aed8b/attachment.htm From scoder at codespeak.net Mon Feb 4 19:35:14 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 4 Feb 2008 19:35:14 +0100 (CET) Subject: [Lxml-checkins] r51261 - in lxml/trunk: . src/lxml/tests Message-ID: <20080204183514.441C91684F0@codespeak.net> Author: scoder Date: Mon Feb 4 19:35:12 2008 New Revision: 51261 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/tests/test_elementtree.py Log: r3407 at delle: sbehnel | 2008-02-04 19:33:49 +0100 disable cET tests for cET for te current version Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Mon Feb 4 19:35:12 2008 @@ -16,7 +16,7 @@ if cElementTree is not None: if tuple([int(n) for n in - getattr(cElementTree, "VERSION", "0.0").split(".")]) <= (1,0,6): + getattr(cElementTree, "VERSION", "0.0").split(".")]) <= (1,0,7): cElementTree = None try: From scoder at codespeak.net Mon Feb 4 19:35:17 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 4 Feb 2008 19:35:17 +0100 (CET) Subject: [Lxml-checkins] r51262 - in lxml/trunk: . doc Message-ID: <20080204183517.BB19F168507@codespeak.net> Author: scoder Date: Mon Feb 4 19:35:16 2008 New Revision: 51262 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/build.txt Log: r3408 at delle: sbehnel | 2008-02-04 19:34:29 +0100 doc clarification: Cython should not used for a regular build Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Mon Feb 4 19:35:16 2008 @@ -23,22 +23,31 @@ Cython ------ -The lxml.etree and lxml.objectify modules are written in Cython_. Since we -distribute the Cython-generated .c files with lxml releases, however, you do -not need Cython to build lxml from the normal release sources. - +.. _EasyInstall: http://peak.telecommunity.com/DevCenter/EasyInstall .. _Cython: http://www.cython.org -If you are interested in building lxml from a Subversion checkout or want to -be an lxml developer, you do need a working Cython installation. You can use -EasyInstall_ to install it:: +The lxml.etree and lxml.objectify modules are written in Cython_. +Since we distribute the Cython-generated .c files with lxml releases, +however, you do not need Cython to build lxml from the normal release +sources. We even encourage you to *not install Cython* for a normal +release build, as the generated C code can vary quite heavily between +Cython versions, which may or may not generate correct code for lxml. +The pre-generated release sources were tested and therefore are known +to work. + +So, if you want a reliable build of lxml, we suggest to a) use a +source release of lxml and b) disable or uninstall Cython for the +build. + +*Only* if you are interested in building lxml from a Subversion +checkout (e.g. to test a bug fix that has not been release yet) or if +want to be an lxml developer, then you do need a working Cython +installation. You can use EasyInstall_ to install it:: - easy_install Cython==0.9.6.11 - -.. _EasyInstall: http://peak.telecommunity.com/DevCenter/EasyInstall + easy_install Cython==0.9.6.11b -lxml currently requires at least Cython 0.9.6.11, but later versions -should work. +lxml currently requires Cython 0.9.6.11b, later versions were not +tested. Subversion From scoder at codespeak.net Tue Feb 5 20:23:47 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 5 Feb 2008 20:23:47 +0100 (CET) Subject: [Lxml-checkins] r51305 - in lxml/trunk: . doc Message-ID: <20080205192347.B2C081683D7@codespeak.net> Author: scoder Date: Tue Feb 5 20:23:44 2008 New Revision: 51305 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/build.txt Log: r3411 at delle: sbehnel | 2008-02-05 10:26:58 +0100 link to MacOS-X thread Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Tue Feb 5 20:23:44 2008 @@ -180,10 +180,10 @@ ``DYLD_LIBRARY_PATH`` environment variable *at runtime* (i.e. not just at build time). This seems to fix a lot of problems for users. -Alternatively, you can build lxml statically. A way to do this on MS Windows -is described in the next section, but it should be easy to adapt it for -Mac-OS. That way, you can always be sure you use the versions you compiled -lxml with, regardless of the runtime environement. +Please read this thread about `experiences with MacOS-X`_ if you +encounter problems. + +.. _`experiences with MacOS-X`: http://thread.gmane.org/gmane.comp.python.lxml.devel/3290/focus=3290 Static linking on Windows From scoder at codespeak.net Tue Feb 5 20:23:53 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 5 Feb 2008 20:23:53 +0100 (CET) Subject: [Lxml-checkins] r51306 - in lxml/trunk: . doc Message-ID: <20080205192353.7677B1683E1@codespeak.net> Author: scoder Date: Tue Feb 5 20:23:52 2008 New Revision: 51306 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/build.txt Log: r3412 at delle: sbehnel | 2008-02-05 11:25:19 +0100 MacOS doc clarifications Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Tue Feb 5 20:23:52 2008 @@ -174,11 +174,17 @@ an older version. The result can be segfaults on this platform that are hard to track down. -To make sure the newer libxml2 and libxslt versions are used -(e.g. those provided by fink or macports), you should add the -directory where you installed the libraries to the -``DYLD_LIBRARY_PATH`` environment variable *at runtime* (i.e. not just -at build time). This seems to fix a lot of problems for users. +To make sure the newer libxml2 and libxslt versions (e.g. those +provided by fink or macports) are used at *build time*, you must take +care that the script ``xslt-config`` is found from the newly installed +version when running the build setup. The system libraries also +provide this script, but the new one must come first in the PATH. + +To make sure the newer libxml2 and libxslt versions are used at +*runtime*, you should add *all* directories where the newer libraries +are installed (i.e. libxml2, libxslt and libexslt) to the +``DYLD_LIBRARY_PATH`` environment variable when you use lxml (i.e. not +only at build time). This seems to fix a lot of problems for users. Please read this thread about `experiences with MacOS-X`_ if you encounter problems. From scoder at codespeak.net Tue Feb 5 20:23:59 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 5 Feb 2008 20:23:59 +0100 (CET) Subject: [Lxml-checkins] r51307 - in lxml/trunk: . doc Message-ID: <20080205192359.1413516844A@codespeak.net> Author: scoder Date: Tue Feb 5 20:23:58 2008 New Revision: 51307 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/build.txt Log: r3413 at delle: sbehnel | 2008-02-05 17:32:39 +0100 MacOS doc clarifications Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Tue Feb 5 20:23:58 2008 @@ -187,9 +187,11 @@ only at build time). This seems to fix a lot of problems for users. Please read this thread about `experiences with MacOS-X`_ if you -encounter problems. +encounter problems. It also has a `buildout for lxml`_ that you can +use. .. _`experiences with MacOS-X`: http://thread.gmane.org/gmane.comp.python.lxml.devel/3290/focus=3290 +.. _`buildout for lxml`: http://thread.gmane.org/gmane.comp.python.lxml.devel/3290/focus=3297 Static linking on Windows From scoder at codespeak.net Thu Feb 7 08:17:50 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 7 Feb 2008 08:17:50 +0100 (CET) Subject: [Lxml-checkins] r51315 - lxml/trunk Message-ID: <20080207071750.09D341684D7@codespeak.net> Author: scoder Date: Thu Feb 7 08:17:48 2008 New Revision: 51315 Modified: lxml/trunk/ (props changed) lxml/trunk/setup.py Log: r3417 at delle: sbehnel | 2008-02-06 10:41:34 +0100 link to project homepage from PyPI Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Thu Feb 7 08:17:48 2008 @@ -76,7 +76,8 @@ It extends the ElementTree API significantly to offer support for XPath, RelaxNG, XML Schema, XSLT, C14N and much more. -To contact the project, go to the project home page or see our bug tracker at +To contact the project, go to the `project home page +`_ or see our bug tracker at https://launchpad.net/lxml In case you want to use the current in-development version of lxml, you can From scoder at codespeak.net Thu Feb 7 08:17:54 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 7 Feb 2008 08:17:54 +0100 (CET) Subject: [Lxml-checkins] r51316 - in lxml/trunk: . doc Message-ID: <20080207071754.AB19D1684D8@codespeak.net> Author: scoder Date: Thu Feb 7 08:17:53 2008 New Revision: 51316 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/FAQ.txt Log: r3418 at delle: sbehnel | 2008-02-06 10:48:03 +0100 link to Enfold Proxy 4 Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Thu Feb 7 08:17:53 2008 @@ -120,6 +120,7 @@ * cssutils_, a CSS parser and toolkit, can be used with ``lxml.cssselect`` * Deliverance_, a content theming tool +* `Enfold Proxy 4`_, a web server accelerator with on-the-fly XSLT processing * Inteproxy_, a secure HTTP proxy * lwebstring_, an XML template engine * OpenXMLlib_, a library for handling OpenXML document meta data @@ -136,6 +137,7 @@ .. _cssutils: http://code.google.com/p/cssutils/source/browse/trunk/examples/style.py?r=917 .. _Deliverance: http://www.openplans.org/projects/deliverance/project-home +.. _`Enfold Proxy 4`: http://www.enfoldsystems.com/Products/Proxy/4 .. _gocept.lxml: http://pypi.python.org/pypi/gocept.lxml .. _Inteproxy: http://lists.wald.intevation.org/pipermail/inteproxy-devel/2007-February/000000.html .. _lwebstring: http://pypi.python.org/pypi/lwebstring From scoder at codespeak.net Thu Feb 7 08:17:58 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 7 Feb 2008 08:17:58 +0100 (CET) Subject: [Lxml-checkins] r51317 - in lxml/trunk: . doc Message-ID: <20080207071758.EB0091684E6@codespeak.net> Author: scoder Date: Thu Feb 7 08:17:58 2008 New Revision: 51317 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/FAQ.txt Log: r3419 at delle: sbehnel | 2008-02-06 17:32:58 +0100 doc beautification Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Thu Feb 7 08:17:58 2008 @@ -116,7 +116,7 @@ Also note that the compatibility to the ElementTree library does not require projects to set a hard dependency on lxml - as long as they do -not need lxml's enhanced feature set. +not take advantage of lxml's enhanced feature set. * cssutils_, a CSS parser and toolkit, can be used with ``lxml.cssselect`` * Deliverance_, a content theming tool From scoder at codespeak.net Thu Feb 7 08:18:03 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 7 Feb 2008 08:18:03 +0100 (CET) Subject: [Lxml-checkins] r51318 - in lxml/trunk: . src/lxml/tests Message-ID: <20080207071803.49E961684D8@codespeak.net> Author: scoder Date: Thu Feb 7 08:18:02 2008 New Revision: 51318 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/tests/common_imports.py Log: r3420 at delle: sbehnel | 2008-02-07 08:17:17 +0100 fix ET version comparison Modified: lxml/trunk/src/lxml/tests/common_imports.py ============================================================================== --- lxml/trunk/src/lxml/tests/common_imports.py (original) +++ lxml/trunk/src/lxml/tests/common_imports.py Thu Feb 7 08:18:02 2008 @@ -5,6 +5,15 @@ from lxml import etree +def make_version_tuple(version_string): + l = [] + for part in re.findall('([0-9]+|[^0-9.]+)', version_string): + try: + l.append(int(part)) + except ValueError: + l.append(part) + return tuple(l) + try: from elementtree import ElementTree # standard ET except ImportError: @@ -14,7 +23,7 @@ ElementTree = None if hasattr(ElementTree, 'VERSION'): - if tuple(ElementTree.VERSION.split('.')) < (1,3): + if make_version_tuple(ElementTree.VERSION)[:2] < (1,3): # compatibility tests require ET 1.3+ ElementTree = None @@ -27,8 +36,8 @@ cElementTree = None if hasattr(cElementTree, 'VERSION'): - if tuple(cElementTree.VERSION.split('.')) < (1,0,7): - # compatibility tests require cET 1.0.7+ + if make_version_tuple(cElementTree.VERSION)[:2] <= (1,0): + # compatibility tests do not run with cET 1.0.7 cElementTree = None try: From scoder at codespeak.net Fri Feb 8 09:12:30 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Feb 2008 09:12:30 +0100 (CET) Subject: [Lxml-checkins] r51326 - in lxml/trunk: . src/lxml Message-ID: <20080208081230.724991684C3@codespeak.net> Author: scoder Date: Fri Feb 8 09:12:28 2008 New Revision: 51326 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r3425 at delle: sbehnel | 2008-02-07 19:11:24 +0100 removed left-over method Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Feb 8 09:12:28 2008 @@ -1271,9 +1271,6 @@ def __setitem__(self, index, value): self._raiseImmutable() - def __setslice__(self, start, end, value): - self._raiseImmutable() - property attrib: def __get__(self): return {} From scoder at codespeak.net Fri Feb 8 09:12:34 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Feb 2008 09:12:34 +0100 (CET) Subject: [Lxml-checkins] r51327 - in lxml/trunk: . src/lxml/tests Message-ID: <20080208081234.290541684CF@codespeak.net> Author: scoder Date: Fri Feb 8 09:12:33 2008 New Revision: 51327 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/tests/test_elementtree.py Log: r3426 at delle: sbehnel | 2008-02-08 09:10:11 +0100 new test case for item assignment with namespaces Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Fri Feb 8 09:12:33 2008 @@ -2005,6 +2005,28 @@ self.assertEquals( [d, c, b], list(a)) + + def test_setslice_all_replace_reversed_ns(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('{ns}a') + b = SubElement(a, '{ns}b', {'{ns1}a1': 'test'}) + c = SubElement(a, '{ns}c', {'{ns2}a2': 'test'}) + d = SubElement(a, '{ns}d', {'{ns3}a3': 'test'}) + + s = [d, c, b] + a[:] = s + self.assertEquals( + [d, c, b], + list(a)) + self.assertEquals( + ['{ns}d', '{ns}c', '{ns}b'], + [ child.tag for child in a ]) + + self.assertEquals( + [['{ns3}a3'], ['{ns2}a2'], ['{ns1}a1']], + [ child.attrib.keys() for child in a ]) def test_setslice_end(self): Element = self.etree.Element From scoder at codespeak.net Fri Feb 8 09:12:38 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Feb 2008 09:12:38 +0100 (CET) Subject: [Lxml-checkins] r51328 - in lxml/trunk: . src/lxml Message-ID: <20080208081238.C868C1684DE@codespeak.net> Author: scoder Date: Fri Feb 8 09:12:38 2008 New Revision: 51328 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.objectify.pyx Log: r3427 at delle: sbehnel | 2008-02-08 09:11:45 +0100 rewrite of OE.__setitem__() in objectify: copy all elements *before* setting the slice Modified: lxml/trunk/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.objectify.pyx (original) +++ lxml/trunk/src/lxml/lxml.objectify.pyx Fri Feb 8 09:12:38 2008 @@ -269,7 +269,8 @@ def __setitem__(self, key, value): """Set the value of a sibling, counting from the first child of the - parent. + parent. Implements key assignment, item assignment and slice + assignment. * If argument is an integer, sets the sibling at that position. @@ -280,12 +281,7 @@ items to the siblings. """ cdef _Element element - cdef _Element parent - cdef _Element new_element - cdef tree.xmlNode* c_self_node - cdef tree.xmlNode* c_parent cdef tree.xmlNode* c_node - cdef Py_ssize_t start, stop, step, slicelength if python._isString(key): key = _buildChildTag(self, key) element = _lookupChild(self, key) @@ -295,48 +291,21 @@ _replaceElement(element, value) return - c_self_node = self._c_node - c_parent = c_self_node.parent - if c_parent is NULL: + if self._c_node.parent is NULL: # the 'root[i] = ...' case raise TypeError("assignment to root element is invalid") if python.PySlice_Check(key): # slice assignment - python.PySlice_GetIndicesEx( - key, _countSiblings(self._c_node), - &start, &stop, &step, &slicelength) - # replace existing items - new_items = iter(value) - if step < 0: - del_items = list(self)[start:stop:step] - else: - del_items = list(islice(self, start, stop, step)) - del_items = iter(del_items) - parent = self.getparent() - try: - for el in del_items: - item = new_items.next() - _replaceElement(el, item) - except StopIteration: - remove = parent.remove - remove(el) - for el in del_items: - remove(el) - return - else: - # append remaining new items - tag = self.tag - for item in new_items: - _appendValue(parent, tag, item) + _setSlice(key, self, value) else: # normal index assignment if key < 0: - c_node = c_parent.last + c_node = self._c_node.parent.last else: - c_node = c_parent.children + c_node = self._c_node.parent.children c_node = _findFollowingSibling( - c_node, tree._getNs(c_self_node), c_self_node.name, key) + c_node, tree._getNs(self._c_node), self._c_node.name, key) if c_node is NULL: raise IndexError(key) element = elementFactory(self._doc, c_node) @@ -538,6 +507,45 @@ PYTYPE_ATTRIBUTE_NAME) cetree.setNodeText(element._c_node, value) +cdef _setSlice(slice, _Element target, items): + cdef _Element parent + # collect new values + new_items = [] + tag = target.tag + for item in items: + if isinstance(item, _Element): + # deep copy the new element + new_element = cetree.deepcopyNodeToDocument( + target._doc, (<_Element>item)._c_node) + new_element.tag = tag + python.PyList_Append(new_items, new_element) + else: + new_element = cetree.makeElement( + tag, target._doc, None, None, None, None, None) + _setElementValue(new_element, item) + python.PyList_Append(new_items, new_element) + + # replace existing items + new_items = iter(new_items) + del_items = iter(target[slice]) + parent = target.getparent() + try: + next_item = new_items.next + replace = parent.replace + for el in del_items: + item = next_item() + replace(el, item) + except StopIteration: + remove = parent.remove + remove(el) + for el in del_items: + remove(el) + return + else: + # append remaining new items + for item in new_items: + _appendValue(parent, tag, item) + ################################################################################ # Data type support in subclasses From scoder at codespeak.net Fri Feb 8 15:32:29 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Feb 2008 15:32:29 +0100 (CET) Subject: [Lxml-checkins] r51335 - in lxml/trunk: . src/lxml/tests Message-ID: <20080208143229.47183168534@codespeak.net> Author: scoder Date: Fri Feb 8 15:32:26 2008 New Revision: 51335 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/tests/test_objectify.py Log: r3431 at delle: sbehnel | 2008-02-08 15:31:54 +0100 slicing tests for objectify Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Fri Feb 8 15:32:26 2008 @@ -427,7 +427,60 @@ self.assertRaises(TypeError, setattr, root.c1.c2, 'text', "test") self.assertRaises(TypeError, setattr, root.c1.c2, 'pyval', "test") - def test_setslice(self): + # slicing + + def test_getslice_complete(self): + root = self.XML("c1c2") + self.assertEquals(["c1", "c2"], + [ c.text for c in root.c[:] ]) + + def test_getslice_partial(self): + root = self.XML("c1c2c3c4") + test_list = ["c1", "c2", "c3", "c4"] + + self.assertEquals(test_list, + [ c.text for c in root.c[:] ]) + self.assertEquals(test_list[1:2], + [ c.text for c in root.c[1:2] ]) + self.assertEquals(test_list[-3:-1], + [ c.text for c in root.c[-3:-1] ]) + self.assertEquals(test_list[-3:3], + [ c.text for c in root.c[-3:3] ]) + self.assertEquals(test_list[-3000:3], + [ c.text for c in root.c[-3000:3] ]) + self.assertEquals(test_list[-3:3000], + [ c.text for c in root.c[-3:3000] ]) + + def test_getslice_partial_neg(self): + root = self.XML("c1c2c3c4") + test_list = ["c1", "c2", "c3", "c4"] + + self.assertEquals(test_list, + [ c.text for c in root.c[:] ]) + self.assertEquals(test_list[2:1:-1], + [ c.text for c in root.c[2:1:-1] ]) + self.assertEquals(test_list[-1:-3:-1], + [ c.text for c in root.c[-1:-3:-1] ]) + self.assertEquals(test_list[2:-3:-1], + [ c.text for c in root.c[2:-3:-1] ]) + self.assertEquals(test_list[2:-3000:-1], + [ c.text for c in root.c[2:-3000:-1] ]) + + + def test_setslice_complete(self): + Element = self.Element + SubElement = self.etree.SubElement + root = Element("root") + root.c = ["c1", "c2"] + + c1 = root.c[0] + c2 = root.c[1] + + self.assertEquals([c1,c2], list(root.c)) + self.assertEquals(["c1", "c2"], + [ c.text for c in root.c ]) + + def test_setslice_elements(self): Element = self.Element SubElement = self.etree.SubElement root = Element("root") @@ -455,6 +508,52 @@ self.assertEquals(["c1", "c2", "c2", "c1"], [ c.text for c in root.c ]) + def test_setslice_partial(self): + Element = self.Element + SubElement = self.etree.SubElement + root = Element("root") + l = ["c1", "c2", "c3", "c4"] + root.c = l + + self.assertEquals(["c1", "c2", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + new_slice = ["cA", "cB"] + l[1:2] = new_slice + root.c[1:2] = new_slice + + self.assertEquals(["c1", "cA", "cB", "c3", "c4"], l) + self.assertEquals(["c1", "cA", "cB", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + def test_setslice_partial_neg(self): + Element = self.Element + SubElement = self.etree.SubElement + root = Element("root") + l = ["c1", "c2", "c3", "c4"] + root.c = l + + self.assertEquals(["c1", "c2", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + new_slice = ["cA", "cB"] + l[-1:1:-1] = new_slice + root.c[-1:1:-1] = new_slice + + self.assertEquals(["c1", "c2", "cB", "cA", "c4"], l) + self.assertEquals(["c1", "c2", "cB", "cA", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + # other stuff + def test_set_string(self): # make sure strings are not handled as sequences Element = self.Element From scoder at codespeak.net Fri Feb 8 15:39:50 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Feb 2008 15:39:50 +0100 (CET) Subject: [Lxml-checkins] r51336 - in lxml/trunk: . src/lxml/tests Message-ID: <20080208143950.08E31168534@codespeak.net> Author: scoder Date: Fri Feb 8 15:39:50 2008 New Revision: 51336 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/tests/test_objectify.py Log: r3433 at delle: sbehnel | 2008-02-08 15:38:06 +0100 comment Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Fri Feb 8 15:39:50 2008 @@ -466,6 +466,7 @@ self.assertEquals(test_list[2:-3000:-1], [ c.text for c in root.c[2:-3000:-1] ]) + # slice assignment def test_setslice_complete(self): Element = self.Element From scoder at codespeak.net Fri Feb 8 15:39:59 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Feb 2008 15:39:59 +0100 (CET) Subject: [Lxml-checkins] r51337 - in lxml/trunk: . src/lxml Message-ID: <20080208143959.B153B168535@codespeak.net> Author: scoder Date: Fri Feb 8 15:39:54 2008 New Revision: 51337 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.objectify.pyx Log: r3434 at delle: sbehnel | 2008-02-08 15:39:12 +0100 fix objectify slicing for negative start/stop/step Modified: lxml/trunk/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.objectify.pyx (original) +++ lxml/trunk/src/lxml/lxml.objectify.pyx Fri Feb 8 15:39:54 2008 @@ -242,13 +242,7 @@ if python._isString(key): return _lookupChildOrRaise(self, key) elif python.PySlice_Check(key): - python.PySlice_GetIndicesEx( - key, _countSiblings(self._c_node), - &start, &stop, &step, &slicelength) - if step < 0: - return list(self)[start:stop:step] - else: - return list(islice(self, start, stop, step)) + return list(self)[key] # normal item access c_self_node = self._c_node c_parent = c_self_node.parent From scoder at codespeak.net Fri Feb 8 19:44:08 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Feb 2008 19:44:08 +0100 (CET) Subject: [Lxml-checkins] r51340 - in lxml/trunk: . src/lxml/tests Message-ID: <20080208184408.7A76016852B@codespeak.net> Author: scoder Date: Fri Feb 8 19:44:06 2008 New Revision: 51340 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/tests/test_objectify.py Log: r3437 at delle: sbehnel | 2008-02-08 19:41:02 +0100 more slicing tests for objectify Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Fri Feb 8 19:44:06 2008 @@ -470,7 +470,6 @@ def test_setslice_complete(self): Element = self.Element - SubElement = self.etree.SubElement root = Element("root") root.c = ["c1", "c2"] @@ -483,7 +482,6 @@ def test_setslice_elements(self): Element = self.Element - SubElement = self.etree.SubElement root = Element("root") root.c = ["c1", "c2"] @@ -511,7 +509,6 @@ def test_setslice_partial(self): Element = self.Element - SubElement = self.etree.SubElement root = Element("root") l = ["c1", "c2", "c3", "c4"] root.c = l @@ -531,9 +528,77 @@ self.assertEquals(l, [ c.text for c in root.c ]) + def test_setslice_insert(self): + Element = self.Element + root = Element("root") + l = ["c1", "c2", "c3", "c4"] + root.c = l + + self.assertEquals(["c1", "c2", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + new_slice = ["cA", "cB"] + l[1:1] = new_slice + root.c[1:1] = new_slice + + self.assertEquals(["c1", "cA", "cB", "c2", "c3", "c4"], l) + self.assertEquals(["c1", "cA", "cB", "c2", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + def test_setslice_insert_neg(self): + Element = self.Element + root = Element("root") + l = ["c1", "c2", "c3", "c4"] + root.c = l + + self.assertEquals(["c1", "c2", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + new_slice = ["cA", "cB"] + l[-2:-2] = new_slice + root.c[-2:-2] = new_slice + + self.assertEquals(["c1", "c2", "cA", "cB", "c3", "c4"], l) + self.assertEquals(["c1", "c2", "cA", "cB", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + def test_setslice_empty(self): + Element = self.Element + root = Element("root") + + root.c = [] + self.assertRaises( + AttributeError, getattr, root, 'c') + + def test_setslice_partial_wrong_length(self): + Element = self.Element + root = Element("root") + l = ["c1", "c2", "c3", "c4"] + root.c = l + + self.assertEquals(["c1", "c2", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + new_slice = ["cA", "cB", "cC"] + self.assertRaises( + ValueError, operator.setitem, + l, slice(1,2,-1), new_slice) + self.assertRaises( + ValueError, operator.setitem, + root.c, slice(1,2,-1), new_slice) + def test_setslice_partial_neg(self): Element = self.Element - SubElement = self.etree.SubElement root = Element("root") l = ["c1", "c2", "c3", "c4"] root.c = l @@ -547,8 +612,29 @@ l[-1:1:-1] = new_slice root.c[-1:1:-1] = new_slice - self.assertEquals(["c1", "c2", "cB", "cA", "c4"], l) - self.assertEquals(["c1", "c2", "cB", "cA", "c4"], + self.assertEquals(["c1", "c2", "cB", "cA"], l) + self.assertEquals(["c1", "c2", "cB", "cA"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + def test_setslice_partial_allneg(self): + Element = self.Element + root = Element("root") + l = ["c1", "c2", "c3", "c4"] + root.c = l + + self.assertEquals(["c1", "c2", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + new_slice = ["cA", "cB"] + l[-1:-4:-2] = new_slice + root.c[-1:-4:-2] = new_slice + + self.assertEquals(["c1", "cB", "c3", "cA"], l) + self.assertEquals(["c1", "cB", "c3", "cA"], [ c.text for c in root.c ]) self.assertEquals(l, [ c.text for c in root.c ]) @@ -558,7 +644,6 @@ def test_set_string(self): # make sure strings are not handled as sequences Element = self.Element - SubElement = self.etree.SubElement root = Element("root") root.c = "TEST" self.assertEquals(["TEST"], @@ -567,7 +652,6 @@ def test_setitem_string(self): # make sure strings are set as children Element = self.Element - SubElement = self.etree.SubElement root = Element("root") root["c"] = "TEST" self.assertEquals(["TEST"], @@ -576,7 +660,6 @@ def test_setitem_string_special(self): # make sure 'text' etc. are set as children Element = self.Element - SubElement = self.etree.SubElement root = Element("root") root["text"] = "TEST" From scoder at codespeak.net Fri Feb 8 19:44:12 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Feb 2008 19:44:12 +0100 (CET) Subject: [Lxml-checkins] r51341 - in lxml/trunk: . src/lxml Message-ID: <20080208184412.DCC4C168531@codespeak.net> Author: scoder Date: Fri Feb 8 19:44:11 2008 New Revision: 51341 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.objectify.pyx Log: r3438 at delle: sbehnel | 2008-02-08 19:43:35 +0100 another rewrite of objectify._setSlice() to fix the handling of slice-overlapping elements Modified: lxml/trunk/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.objectify.pyx (original) +++ lxml/trunk/src/lxml/lxml.objectify.pyx Fri Feb 8 19:44:11 2008 @@ -503,6 +503,17 @@ cdef _setSlice(slice, _Element target, items): cdef _Element parent + cdef tree.xmlNode* c_node + cdef Py_ssize_t c_step, c_start, pos + # collect existing slice + if (slice).step is None: + c_step = 1 + else: + c_step = (slice).step + if c_step == 0: + raise ValueError("Invalid slice") + del_items = target[slice] + # collect new values new_items = [] tag = target.tag @@ -512,33 +523,58 @@ new_element = cetree.deepcopyNodeToDocument( target._doc, (<_Element>item)._c_node) new_element.tag = tag - python.PyList_Append(new_items, new_element) else: new_element = cetree.makeElement( tag, target._doc, None, None, None, None, None) _setElementValue(new_element, item) - python.PyList_Append(new_items, new_element) + python.PyList_Append(new_items, new_element) + + # sanity check - raise what a list would raise + if c_step != 1 and \ + python.PyList_GET_SIZE(del_items) != python.PyList_GET_SIZE(new_items): + raise ValueError( + "attempt to assign sequence of size %d to extended slice of size %d" % ( + python.PyList_GET_SIZE(new_items), + python.PyList_GET_SIZE(del_items))) # replace existing items - new_items = iter(new_items) - del_items = iter(target[slice]) + pos = 0 parent = target.getparent() - try: - next_item = new_items.next - replace = parent.replace - for el in del_items: - item = next_item() - replace(el, item) - except StopIteration: + replace = parent.replace + while pos < python.PyList_GET_SIZE(new_items) and \ + pos < python.PyList_GET_SIZE(del_items): + replace(del_items[pos], new_items[pos]) + pos += 1 + # remove leftover items + if pos < python.PyList_GET_SIZE(del_items): remove = parent.remove - remove(el) - for el in del_items: - remove(el) - return - else: - # append remaining new items - for item in new_items: - _appendValue(parent, tag, item) + while pos < python.PyList_GET_SIZE(del_items): + remove(del_items[pos]) + pos += 1 + # append remaining new items + if pos < python.PyList_GET_SIZE(new_items): + # the sanity check above guarantees (step == 1) + if pos > 0: + item = new_items[pos-1] + else: + if (slice).start > 0: + c_node = parent._c_node.children + else: + c_node = parent._c_node.last + c_node = _findFollowingSibling( + c_node, tree._getNs(target._c_node), target._c_node.name, + (slice).start - 1) + if c_node is NULL: + while pos < python.PyList_GET_SIZE(new_items): + cetree.appendChild(parent, new_items[pos]) + pos += 1 + return + item = cetree.elementFactory(parent._doc, c_node) + while pos < python.PyList_GET_SIZE(new_items): + add = item.addnext + item = new_items[pos] + add(item) + pos += 1 ################################################################################ # Data type support in subclasses From scoder at codespeak.net Fri Feb 8 19:51:09 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Feb 2008 19:51:09 +0100 (CET) Subject: [Lxml-checkins] r51342 - lxml/trunk Message-ID: <20080208185109.54A9816852B@codespeak.net> Author: scoder Date: Fri Feb 8 19:51:08 2008 New Revision: 51342 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r3441 at delle: sbehnel | 2008-02-08 19:47:51 +0100 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Feb 8 19:51:08 2008 @@ -2,6 +2,22 @@ lxml changelog ============== +2.0.1 (Under development) +========================= + +Features added +-------------- + +Bugs fixed +---------- + +* Setting an element slice in objectify could insert overlapping + elements at the wrong position. + +Other changes +------------- + + 2.0 (2008-02-01) ================ From scoder at codespeak.net Fri Feb 8 19:51:12 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Feb 2008 19:51:12 +0100 (CET) Subject: [Lxml-checkins] r51343 - lxml/trunk Message-ID: <20080208185112.49FA216852E@codespeak.net> Author: scoder Date: Fri Feb 8 19:51:11 2008 New Revision: 51343 Modified: lxml/trunk/ (props changed) lxml/trunk/version.txt Log: r3442 at delle: sbehnel | 2008-02-08 19:48:15 +0100 version Modified: lxml/trunk/version.txt ============================================================================== --- lxml/trunk/version.txt (original) +++ lxml/trunk/version.txt Fri Feb 8 19:51:11 2008 @@ -1 +1 @@ -2.0 +2.0.1 From scoder at codespeak.net Fri Feb 8 19:51:16 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Feb 2008 19:51:16 +0100 (CET) Subject: [Lxml-checkins] r51344 - in lxml/trunk: . doc Message-ID: <20080208185116.C1BAB168531@codespeak.net> Author: scoder Date: Fri Feb 8 19:51:16 2008 New Revision: 51344 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/main.txt Log: r3443 at delle: sbehnel | 2008-02-08 19:50:40 +0100 version Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Fri Feb 8 19:51:16 2008 @@ -145,8 +145,8 @@ .. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/ .. _`this key`: pubkey.asc -The latest version is `lxml 2.0`_, released 2008-02-01 -(`changes for 2.0`_). `Older versions`_ are listed below. +The latest version is `lxml 2.0.1`_, released 2008-02-08 +(`changes for 2.0.1`_). `Older versions`_ are listed below. .. _`Older versions`: #old-versions @@ -206,21 +206,7 @@ Old Versions ------------ -* `lxml 2.0beta2`_, released 2008-01-26 (`changes for 2.0beta2`_) - -* `lxml 2.0beta1`_, released 2008-01-11 (`changes for 2.0beta1`_) - -* `lxml 2.0alpha6`_, released 2007-12-19 (`changes for 2.0alpha6`_) - -* `lxml 2.0alpha5`_, released 2007-11-24 (`changes for 2.0alpha5`_) - -* `lxml 2.0alpha4`_, released 2007-10-07 (`changes for 2.0alpha4`_) - -* `lxml 2.0alpha3`_, released 2007-09-26 (`changes for 2.0alpha3`_) - -* `lxml 2.0alpha2`_, released 2007-09-15 (`changes for 2.0alpha2`_) - -* `lxml 2.0alpha1`_, released 2007-09-02 (`changes for 2.0alpha1`_) +* `lxml 2.0`_, released 2008-02-01 (`changes for 2.0`_) * `lxml 1.3.6`_, released 2007-10-29 (`changes for 1.3.6`_) @@ -272,15 +258,8 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.0.1`: lxml-2.0.1.tgz .. _`lxml 2.0`: lxml-2.0.tgz -.. _`lxml 2.0beta2`: lxml-2.0beta2.tgz -.. _`lxml 2.0beta1`: lxml-2.0beta1.tgz -.. _`lxml 2.0alpha6`: lxml-2.0alpha6.tgz -.. _`lxml 2.0alpha5`: lxml-2.0alpha5.tgz -.. _`lxml 2.0alpha4`: lxml-2.0alpha4.tgz -.. _`lxml 2.0alpha3`: lxml-2.0alpha3.tgz -.. _`lxml 2.0alpha2`: lxml-2.0alpha2.tgz -.. _`lxml 2.0alpha1`: lxml-2.0alpha1.tgz .. _`lxml 1.3.6`: lxml-1.3.6.tgz .. _`lxml 1.3.5`: lxml-1.3.5.tgz .. _`lxml 1.3.4`: lxml-1.3.4.tgz @@ -306,15 +285,8 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.0.1`: changes-2.0.1.html .. _`changes for 2.0`: changes-2.0.html -.. _`changes for 2.0beta2`: changes-2.0beta2.html -.. _`changes for 2.0beta1`: changes-2.0beta1.html -.. _`changes for 2.0alpha6`: changes-2.0alpha6.html -.. _`changes for 2.0alpha5`: changes-2.0alpha5.html -.. _`changes for 2.0alpha4`: changes-2.0alpha4.html -.. _`changes for 2.0alpha3`: changes-2.0alpha3.html -.. _`changes for 2.0alpha2`: changes-2.0alpha2.html -.. _`changes for 2.0alpha1`: changes-2.0alpha1.html .. _`changes for 1.3.6`: changes-1.3.6.html .. _`changes for 1.3.5`: changes-1.3.5.html .. _`changes for 1.3.4`: changes-1.3.4.html From scoder at codespeak.net Fri Feb 8 20:39:59 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Feb 2008 20:39:59 +0100 (CET) Subject: [Lxml-checkins] r51346 - in lxml/trunk: . src/lxml/tests Message-ID: <20080208193959.AA98516853D@codespeak.net> Author: scoder Date: Fri Feb 8 20:39:58 2008 New Revision: 51346 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/tests/test_elementtree.py Log: r3447 at delle: sbehnel | 2008-02-08 20:00:25 +0100 new test case for setting a reversed slice with namespaces Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Fri Feb 8 20:39:58 2008 @@ -2005,8 +2005,8 @@ self.assertEquals( [d, c, b], list(a)) - - def test_setslice_all_replace_reversed_ns(self): + + def test_setslice_all_replace_reversed_ns1(self): Element = self.etree.Element SubElement = self.etree.SubElement @@ -2028,6 +2028,28 @@ [['{ns3}a3'], ['{ns2}a2'], ['{ns1}a1']], [ child.attrib.keys() for child in a ]) + def test_setslice_all_replace_reversed_ns2(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('{ns}a') + b = SubElement(a, '{ns1}b', {'{ns}a1': 'test'}) + c = SubElement(a, '{ns2}c', {'{ns}a2': 'test'}) + d = SubElement(a, '{ns3}d', {'{ns}a3': 'test'}) + + s = [d, c, b] + a[:] = s + self.assertEquals( + [d, c, b], + list(a)) + self.assertEquals( + ['{ns3}d', '{ns2}c', '{ns1}b'], + [ child.tag for child in a ]) + + self.assertEquals( + [['{ns}a3'], ['{ns}a2'], ['{ns}a1']], + [ child.attrib.keys() for child in a ]) + def test_setslice_end(self): Element = self.etree.Element SubElement = self.etree.SubElement From scoder at codespeak.net Fri Feb 8 20:40:03 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Feb 2008 20:40:03 +0100 (CET) Subject: [Lxml-checkins] r51347 - lxml/trunk Message-ID: <20080208194003.AF49A16853E@codespeak.net> Author: scoder Date: Fri Feb 8 20:40:02 2008 New Revision: 51347 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r3448 at delle: sbehnel | 2008-02-08 20:37:40 +0100 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Feb 8 20:40:02 2008 @@ -11,7 +11,7 @@ Bugs fixed ---------- -* Setting an element slice in objectify could insert overlapping +* Setting an element slice in objectify could insert slice-overlapping elements at the wrong position. Other changes From scoder at codespeak.net Fri Feb 8 20:40:07 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Feb 2008 20:40:07 +0100 (CET) Subject: [Lxml-checkins] r51348 - lxml/trunk Message-ID: <20080208194007.512EB16853F@codespeak.net> Author: scoder Date: Fri Feb 8 20:40:06 2008 New Revision: 51348 Modified: lxml/trunk/ (props changed) lxml/trunk/setupinfo.py Log: r3449 at delle: sbehnel | 2008-02-08 20:38:10 +0100 support overriding xslt-config in setup.py: --with-xslt-config=XXX Modified: lxml/trunk/setupinfo.py ============================================================================== --- lxml/trunk/setupinfo.py (original) +++ lxml/trunk/setupinfo.py Fri Feb 8 20:40:06 2008 @@ -81,7 +81,7 @@ return static_library_dirs # filter them from xslt-config --libs result = [] - possible_library_dirs = flags('xslt-config --libs') + possible_library_dirs = flags('libs') for possible_library_dir in possible_library_dirs: if possible_library_dir.startswith('-L'): result.append(possible_library_dir[2:]) @@ -95,7 +95,7 @@ return static_include_dirs # filter them from xslt-config --cflags result = [] - possible_include_dirs = flags('xslt-config --cflags') + possible_include_dirs = flags('cflags') for possible_include_dir in possible_include_dirs: if possible_include_dir.startswith('-I'): result.append(possible_include_dir[2:]) @@ -114,7 +114,7 @@ return result # anything from xslt-config --cflags that doesn't start with -I - possible_cflags = flags('xslt-config --cflags') + possible_cflags = flags('cflags') for possible_cflag in possible_cflags: if not possible_cflag.startswith('-I'): result.append(possible_cflag) @@ -127,8 +127,9 @@ if OPTION_WITHOUT_THREADING: macros.append(('WITHOUT_THREADING', None)) return macros - -def flags(cmd): + +def flags(option): + cmd = "%s --%s" % (find_xslt_config(), option) try: import subprocess except ImportError: @@ -145,6 +146,22 @@ print("** make sure the development packages of libxml2 and libxslt are installed **\n") return str(rf.read()).split() +XSLT_CONFIG = None + +def find_xslt_config(): + global XSLT_CONFIG + if XSLT_CONFIG: + return XSLT_CONFIG + option = '--with-xslt-config=' + for arg in sys.argv: + if arg.startswith(option): + sys.argv.remove(arg) + XSLT_CONFIG = arg[len(option):] + return XSLT_CONFIG + else: + XSLT_CONFIG = 'xslt-config' + return XSLT_CONFIG + def has_option(name): try: sys.argv.remove('--%s' % name) From scoder at codespeak.net Fri Feb 8 20:43:39 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Feb 2008 20:43:39 +0100 (CET) Subject: [Lxml-checkins] r51349 - in lxml/trunk: . doc Message-ID: <20080208194339.1B48F16853D@codespeak.net> Author: scoder Date: Fri Feb 8 20:43:38 2008 New Revision: 51349 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/build.txt Log: r3453 at delle: sbehnel | 2008-02-08 20:43:13 +0100 mention new --with-xslt-config option in build docs Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Fri Feb 8 20:43:38 2008 @@ -88,9 +88,13 @@ If you get errors about missing header files (e.g. ``libxml/xmlversion.h``) then you need to make sure the development packages of both libxml2 -and libxslt are properly installed. If this doesn't help, you may -have to add the location of the header files to the include path -like:: +and libxslt are properly installed. Try passing the following option to +setup.py to make sure the right config is found:: + + python setup.py build --with-xslt-config=/path/to/xslt-config + +If this doesn't help, you may have to add the location of the header +files to the include path like:: python setup.py build_ext -i -I /usr/include/libxml2 @@ -178,7 +182,11 @@ provided by fink or macports) are used at *build time*, you must take care that the script ``xslt-config`` is found from the newly installed version when running the build setup. The system libraries also -provide this script, but the new one must come first in the PATH. +provide this script, but the new one must come first in the PATH. The +best way to make sure the right version is used is by passing the path +to the script as an option to setup.py:: + + python setup.py build --with-xslt-config=/path/to/xslt-config To make sure the newer libxml2 and libxslt versions are used at *runtime*, you should add *all* directories where the newer libraries From scoder at codespeak.net Fri Feb 8 20:45:10 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 8 Feb 2008 20:45:10 +0100 (CET) Subject: [Lxml-checkins] r51350 - lxml/trunk Message-ID: <20080208194510.BF95F16853D@codespeak.net> Author: scoder Date: Fri Feb 8 20:45:10 2008 New Revision: 51350 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r3455 at delle: sbehnel | 2008-02-08 20:44:45 +0100 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Feb 8 20:45:10 2008 @@ -17,6 +17,10 @@ Other changes ------------- +* Passing ``--with-xslt-config=/path/to/xslt-config`` to setup.py will + override the ``xslt-config`` script that is used to determine the C + compiler options. + 2.0 (2008-02-01) ================ From scoder at codespeak.net Sat Feb 9 18:31:28 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 9 Feb 2008 18:31:28 +0100 (CET) Subject: [Lxml-checkins] r51361 - in lxml/trunk: . doc Message-ID: <20080209173128.5F1BD1684EF@codespeak.net> Author: scoder Date: Sat Feb 9 18:31:26 2008 New Revision: 51361 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/performance.txt Log: r3457 at delle: sbehnel | 2008-02-09 18:30:53 +0100 updated benchmark results Modified: lxml/trunk/doc/performance.txt ============================================================================== --- lxml/trunk/doc/performance.txt (original) +++ lxml/trunk/doc/performance.txt Sat Feb 9 18:31:26 2008 @@ -71,8 +71,8 @@ a specific part of the API yourself, please consider sending it to the lxml mailing list. -The timings cited below compare lxml 2.0alpha (with libxml2 2.6.30) to -the December 2007 SVN trunk versions of ElementTree (1.3) and +The timings cited below compare lxml 2.0 final (with libxml2 2.6.31) +to the January 2008 SVN trunk versions of ElementTree (1.3) and cElementTree (1.2.7). They were run single-threaded on a 1.8GHz Intel Core Duo machine under Ubuntu Linux 7.10 (Gutsy). The C libraries were compiled with the same platform specific optimisation flags. The @@ -117,23 +117,23 @@ 1.2, lxml is still more than 5 times as fast as the much improved ElementTree 1.3:: - lxe: tostring_utf16 (SATR T1) 23.4821 msec/pass + lxe: tostring_utf16 (SATR T1) 19.0921 msec/pass cET: tostring_utf16 (SATR T1) 129.8430 msec/pass ET : tostring_utf16 (SATR T1) 136.1301 msec/pass - lxe: tostring_utf16 (UATR T1) 23.4859 msec/pass + lxe: tostring_utf16 (UATR T1) 20.4630 msec/pass cET: tostring_utf16 (UATR T1) 130.1570 msec/pass ET : tostring_utf16 (UATR T1) 136.3101 msec/pass - lxe: tostring_utf16 (S-TR T2) 24.2729 msec/pass + lxe: tostring_utf16 (S-TR T2) 18.8632 msec/pass cET: tostring_utf16 (S-TR T2) 136.9388 msec/pass ET : tostring_utf16 (S-TR T2) 143.9550 msec/pass - lxe: tostring_utf8 (S-TR T2) 18.4860 msec/pass + lxe: tostring_utf8 (S-TR T2) 14.4310 msec/pass cET: tostring_utf8 (S-TR T2) 137.0859 msec/pass ET : tostring_utf8 (S-TR T2) 144.3110 msec/pass - lxe: tostring_utf8 (U-TR T3) 2.7399 msec/pass + lxe: tostring_utf8 (U-TR T3) 2.6381 msec/pass cET: tostring_utf8 (U-TR T3) 52.1040 msec/pass ET : tostring_utf8 (U-TR T3) 53.1070 msec/pass @@ -205,10 +205,10 @@ (given in seconds):: lxe: -- S- U- -A SA UA - T1: 0.0914 0.0875 0.0872 0.0892 0.0882 0.0900 - T2: 0.0894 0.0897 0.0892 0.0988 0.0978 0.0974 - T3: 0.0219 0.0194 0.0189 0.0570 0.0570 0.0573 - T4: 0.0004 0.0003 0.0003 0.0012 0.0012 0.0012 + T1: 0.0783 0.0777 0.0774 0.0787 0.0781 0.0783 + T2: 0.0799 0.0796 0.0799 0.0879 0.0882 0.0886 + T3: 0.0245 0.0216 0.0217 0.0577 0.0575 0.0572 + T4: 0.0003 0.0003 0.0003 0.0011 0.0011 0.0011 cET: -- S- U- -A SA UA T1: 0.0272 0.0264 0.0267 0.0268 0.0261 0.0265 T2: 0.0280 0.0274 0.0273 0.0273 0.0276 0.0275 @@ -235,21 +235,21 @@ create a shallow copy of their list of children, lxml has to create a Python object for each child and collect them in a list:: - lxe: root_list_children (--TR T1) 0.0169 msec/pass + lxe: root_list_children (--TR T1) 0.0160 msec/pass cET: root_list_children (--TR T1) 0.0081 msec/pass ET : root_list_children (--TR T1) 0.0541 msec/pass - lxe: root_list_children (--TR T2) 0.2339 msec/pass + lxe: root_list_children (--TR T2) 0.2100 msec/pass cET: root_list_children (--TR T2) 0.0319 msec/pass ET : root_list_children (--TR T2) 0.4420 msec/pass This handicap is also visible when accessing single children:: - lxe: first_child (--TR T2) 0.2470 msec/pass + lxe: first_child (--TR T2) 0.2429 msec/pass cET: first_child (--TR T2) 0.2170 msec/pass ET : first_child (--TR T2) 0.9968 msec/pass - lxe: last_child (--TR T1) 0.2482 msec/pass + lxe: last_child (--TR T1) 0.2470 msec/pass cET: last_child (--TR T1) 0.2291 msec/pass ET : last_child (--TR T1) 0.9830 msec/pass @@ -258,11 +258,11 @@ The data structure used by libxml2 is a linked tree, and thus, a linked list of children:: - lxe: middle_child (--TR T1) 0.2789 msec/pass + lxe: middle_child (--TR T1) 0.2759 msec/pass cET: middle_child (--TR T1) 0.2229 msec/pass ET : middle_child (--TR T1) 1.0030 msec/pass - lxe: middle_child (--TR T2) 1.9610 msec/pass + lxe: middle_child (--TR T2) 1.7071 msec/pass cET: middle_child (--TR T2) 0.2229 msec/pass ET : middle_child (--TR T2) 0.9930 msec/pass @@ -274,7 +274,7 @@ in. This results in a major performance difference for creating independent Elements that end up in independently created documents:: - lxe: create_elements (--TC T2) 3.1691 msec/pass + lxe: create_elements (--TC T2) 2.8961 msec/pass cET: create_elements (--TC T2) 0.1929 msec/pass ET : create_elements (--TC T2) 1.3590 msec/pass @@ -282,11 +282,11 @@ are supposed to end up in, either as SubElements of an Element or using the explicit ``Element.makeelement()`` call:: - lxe: makeelement (--TC T2) 2.2650 msec/pass + lxe: makeelement (--TC T2) 1.9000 msec/pass cET: makeelement (--TC T2) 0.3211 msec/pass ET : makeelement (--TC T2) 1.6358 msec/pass - lxe: create_subelements (--TC T2) 1.9531 msec/pass + lxe: create_subelements (--TC T2) 1.7891 msec/pass cET: create_subelements (--TC T2) 0.2351 msec/pass ET : create_subelements (--TC T2) 3.2270 msec/pass @@ -305,11 +305,11 @@ The following benchmark appends all root children of the second tree to the root of the first tree:: - lxe: append_from_document (--TR T1,T2) 3.8681 msec/pass + lxe: append_from_document (--TR T1,T2) 3.3841 msec/pass cET: append_from_document (--TR T1,T2) 0.2699 msec/pass ET : append_from_document (--TR T1,T2) 1.2650 msec/pass - lxe: append_from_document (--TR T3,T4) 0.0570 msec/pass + lxe: append_from_document (--TR T3,T4) 0.0441 msec/pass cET: append_from_document (--TR T3,T4) 0.0169 msec/pass ET : append_from_document (--TR T3,T4) 0.0820 msec/pass @@ -322,20 +322,20 @@ This difference is not always as visible, but applies to most parts of the API, like inserting newly created elements:: - lxe: insert_from_document (--TR T1,T2) 5.8019 msec/pass + lxe: insert_from_document (--TR T1,T2) 5.7020 msec/pass cET: insert_from_document (--TR T1,T2) 0.4041 msec/pass ET : insert_from_document (--TR T1,T2) 1.4789 msec/pass or replacing the child slice by a newly created element:: - lxe: replace_children_element (--TC T1) 0.2480 msec/pass + lxe: replace_children_element (--TC T1) 0.2210 msec/pass cET: replace_children_element (--TC T1) 0.0238 msec/pass ET : replace_children_element (--TC T1) 0.1600 msec/pass as opposed to replacing the slice with an existing element from the same document:: - lxe: replace_children (--TC T1) 0.0188 msec/pass + lxe: replace_children (--TC T1) 0.0179 msec/pass cET: replace_children (--TC T1) 0.0119 msec/pass ET : replace_children (--TC T1) 0.0739 msec/pass @@ -347,16 +347,16 @@ Deep copying a tree is fast in lxml:: - lxe: deepcopy_all (--TR T1) 10.9420 msec/pass + lxe: deepcopy_all (--TR T1) 9.7558 msec/pass cET: deepcopy_all (--TR T1) 120.6188 msec/pass ET : deepcopy_all (--TR T1) 902.6880 msec/pass - lxe: deepcopy_all (-ATR T2) 12.5830 msec/pass + lxe: deepcopy_all (-ATR T2) 12.3210 msec/pass cET: deepcopy_all (-ATR T2) 136.9810 msec/pass ET : deepcopy_all (-ATR T2) 944.2801 msec/pass - lxe: deepcopy_all (S-TR T3) 4.1170 msec/pass - cET: deepcopy_all (S-TR T3) 36.1221 msec/pass + lxe: deepcopy_all (S-TR T3) 8.3981 msec/pass + cET: deepcopy_all (S-TR T3) 35.6541 msec/pass ET : deepcopy_all (S-TR T3) 221.6041 msec/pass So, for example, if you have a database-like scenario where you parse in a @@ -372,37 +372,37 @@ especially if few elements are of interest or the target element tag name is known, lxml is a good choice:: - lxe: getiterator_all (--TR T1) 5.8582 msec/pass + lxe: getiterator_all (--TR T1) 5.7251 msec/pass cET: getiterator_all (--TR T1) 39.9489 msec/pass ET : getiterator_all (--TR T1) 23.0000 msec/pass - lxe: getiterator_islice (--TR T2) 0.0780 msec/pass + lxe: getiterator_islice (--TR T2) 0.0830 msec/pass cET: getiterator_islice (--TR T2) 0.3440 msec/pass ET : getiterator_islice (--TR T2) 0.2429 msec/pass - lxe: getiterator_tag (--TR T2) 0.3119 msec/pass + lxe: getiterator_tag (--TR T2) 0.3011 msec/pass cET: getiterator_tag (--TR T2) 14.1001 msec/pass ET : getiterator_tag (--TR T2) 7.4241 msec/pass - lxe: getiterator_tag_all (--TR T2) 0.6540 msec/pass + lxe: getiterator_tag_all (--TR T2) 0.6340 msec/pass cET: getiterator_tag_all (--TR T2) 40.7901 msec/pass ET : getiterator_tag_all (--TR T2) 21.0390 msec/pass This translates directly into similar timings for ``Element.findall()``:: - lxe: findall (--TR T2) 8.1239 msec/pass + lxe: findall (--TR T2) 7.8950 msec/pass cET: findall (--TR T2) 44.5340 msec/pass ET : findall (--TR T2) 27.1149 msec/pass - lxe: findall (--TR T3) 1.6870 msec/pass + lxe: findall (--TR T3) 1.7281 msec/pass cET: findall (--TR T3) 12.9611 msec/pass ET : findall (--TR T3) 8.6131 msec/pass - lxe: findall_tag (--TR T2) 0.7660 msec/pass + lxe: findall_tag (--TR T2) 0.7720 msec/pass cET: findall_tag (--TR T2) 40.6358 msec/pass ET : findall_tag (--TR T2) 21.4581 msec/pass - lxe: findall_tag (--TR T3) 0.2160 msec/pass + lxe: findall_tag (--TR T3) 0.2050 msec/pass cET: findall_tag (--TR T3) 9.6831 msec/pass ET : findall_tag (--TR T3) 5.2109 msec/pass @@ -420,38 +420,38 @@ of the lxml API you use. The most straight forward way is to call the ``xpath()`` method on an Element or ElementTree:: - lxe: xpath_method (--TC T1) 1.8251 msec/pass - lxe: xpath_method (--TC T2) 23.3159 msec/pass - lxe: xpath_method (--TC T3) 0.1378 msec/pass - lxe: xpath_method (--TC T4) 1.1270 msec/pass + lxe: xpath_method (--TC T1) 1.7459 msec/pass + lxe: xpath_method (--TC T2) 22.0850 msec/pass + lxe: xpath_method (--TC T3) 0.1309 msec/pass + lxe: xpath_method (--TC T4) 1.0772 msec/pass This is well suited for testing and when the XPath expressions are as diverse as the trees they are called on. However, if you have a single XPath expression that you want to apply to a larger number of different elements, the ``XPath`` class is the most efficient way to do it:: - lxe: xpath_class (--TC T1) 0.6981 msec/pass - lxe: xpath_class (--TC T2) 3.6111 msec/pass - lxe: xpath_class (--TC T3) 0.0591 msec/pass - lxe: xpath_class (--TC T4) 0.1979 msec/pass + lxe: xpath_class (--TC T1) 0.6740 msec/pass + lxe: xpath_class (--TC T2) 3.1760 msec/pass + lxe: xpath_class (--TC T3) 0.0548 msec/pass + lxe: xpath_class (--TC T4) 0.1700 msec/pass Note that this still allows you to use variables in the expression, so you can parse it once and then adapt it through variables at call time. In other cases, where you have a fixed Element or ElementTree and want to run different expressions on it, you should consider the ``XPathEvaluator``:: - lxe: xpath_element (--TR T1) 0.4342 msec/pass - lxe: xpath_element (--TR T2) 11.9958 msec/pass - lxe: xpath_element (--TR T3) 0.1690 msec/pass - lxe: xpath_element (--TR T4) 0.3510 msec/pass + lxe: xpath_element (--TR T1) 0.4151 msec/pass + lxe: xpath_element (--TR T2) 11.6129 msec/pass + lxe: xpath_element (--TR T3) 0.1299 msec/pass + lxe: xpath_element (--TR T4) 0.3409 msec/pass While it looks slightly slower, creating an XPath object for each of the expressions generates a much higher overhead here:: - lxe: xpath_class_repeat (--TC T1) 1.7619 msec/pass - lxe: xpath_class_repeat (--TC T2) 21.9102 msec/pass - lxe: xpath_class_repeat (--TC T3) 0.1330 msec/pass - lxe: xpath_class_repeat (--TC T4) 1.0631 msec/pass + lxe: xpath_class_repeat (--TC T1) 1.6699 msec/pass + lxe: xpath_class_repeat (--TC T2) 20.4420 msec/pass + lxe: xpath_class_repeat (--TC T3) 0.1230 msec/pass + lxe: xpath_class_repeat (--TC T4) 0.9859 msec/pass A longer example @@ -608,21 +608,21 @@ tree. It avoids step-by-step Python element instantiations along the path, which can substantially improve the access time:: - lxe: attribute (--TR T1) 9.8128 msec/pass - lxe: attribute (--TR T2) 53.2899 msec/pass - lxe: attribute (--TR T4) 9.6800 msec/pass - - lxe: objectpath (--TR T1) 5.4898 msec/pass - lxe: objectpath (--TR T2) 48.4819 msec/pass - lxe: objectpath (--TR T4) 5.3761 msec/pass - - lxe: attributes_deep (--TR T1) 56.3290 msec/pass - lxe: attributes_deep (--TR T2) 62.4361 msec/pass - lxe: attributes_deep (--TR T4) 15.8000 msec/pass - - lxe: objectpath_deep (--TR T1) 49.0060 msec/pass - lxe: objectpath_deep (--TR T2) 52.5169 msec/pass - lxe: objectpath_deep (--TR T4) 7.1371 msec/pass + lxe: attribute (--TR T1) 9.4581 msec/pass + lxe: attribute (--TR T2) 52.5560 msec/pass + lxe: attribute (--TR T4) 9.1729 msec/pass + + lxe: objectpath (--TR T1) 4.8690 msec/pass + lxe: objectpath (--TR T2) 47.8780 msec/pass + lxe: objectpath (--TR T4) 4.7870 msec/pass + + lxe: attributes_deep (--TR T1) 54.7471 msec/pass + lxe: attributes_deep (--TR T2) 62.7451 msec/pass + lxe: attributes_deep (--TR T4) 15.1050 msec/pass + + lxe: objectpath_deep (--TR T1) 48.2810 msec/pass + lxe: objectpath_deep (--TR T2) 51.3949 msec/pass + lxe: objectpath_deep (--TR T4) 6.1419 msec/pass Note, however, that parsing ObjectPath expressions is not for free either, so this is most effective for frequently accessing the same element. @@ -648,17 +648,17 @@ subtrees and elements) to cache, you can trade memory usage against access speed:: - lxe: attribute_cached (--TR T1) 7.6170 msec/pass - lxe: attribute_cached (--TR T2) 50.7941 msec/pass - lxe: attribute_cached (--TR T4) 7.4880 msec/pass - - lxe: attributes_deep_cached (--TR T1) 49.9220 msec/pass - lxe: attributes_deep_cached (--TR T2) 55.9340 msec/pass - lxe: attributes_deep_cached (--TR T4) 10.0131 msec/pass - - lxe: objectpath_deep_cached (--TR T1) 44.9121 msec/pass - lxe: objectpath_deep_cached (--TR T2) 48.2371 msec/pass - lxe: objectpath_deep_cached (--TR T4) 3.9630 msec/pass + lxe: attribute_cached (--TR T1) 7.5061 msec/pass + lxe: attribute_cached (--TR T2) 50.1881 msec/pass + lxe: attribute_cached (--TR T4) 7.4170 msec/pass + + lxe: attributes_deep_cached (--TR T1) 48.7239 msec/pass + lxe: attributes_deep_cached (--TR T2) 55.2199 msec/pass + lxe: attributes_deep_cached (--TR T4) 9.9740 msec/pass + + lxe: objectpath_deep_cached (--TR T1) 43.4160 msec/pass + lxe: objectpath_deep_cached (--TR T2) 47.6480 msec/pass + lxe: objectpath_deep_cached (--TR T4) 3.4680 msec/pass Things to note: you cannot currently use ``weakref.WeakKeyDictionary`` objects for this as lxml's element objects do not support weak references (which are From scoder at codespeak.net Sat Feb 9 18:37:36 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 9 Feb 2008 18:37:36 +0100 (CET) Subject: [Lxml-checkins] r51362 - in lxml/trunk: . doc Message-ID: <20080209173736.5B9281684F8@codespeak.net> Author: scoder Date: Sat Feb 9 18:37:34 2008 New Revision: 51362 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/performance.txt Log: r3459 at delle: sbehnel | 2008-02-09 18:37:06 +0100 small doc fix Modified: lxml/trunk/doc/performance.txt ============================================================================== --- lxml/trunk/doc/performance.txt (original) +++ lxml/trunk/doc/performance.txt Sat Feb 9 18:37:34 2008 @@ -72,7 +72,7 @@ mailing list. The timings cited below compare lxml 2.0 final (with libxml2 2.6.31) -to the January 2008 SVN trunk versions of ElementTree (1.3) and +to the January 2008 SVN trunk versions of ElementTree (1.3alpha) and cElementTree (1.2.7). They were run single-threaded on a 1.8GHz Intel Core Duo machine under Ubuntu Linux 7.10 (Gutsy). The C libraries were compiled with the same platform specific optimisation flags. The From scoder at codespeak.net Mon Feb 11 19:28:03 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 11 Feb 2008 19:28:03 +0100 (CET) Subject: [Lxml-checkins] r51394 - in lxml/trunk: . src/lxml/html src/lxml/html/tests Message-ID: <20080211182803.9A6021683FA@codespeak.net> Author: scoder Date: Mon Feb 11 19:28:01 2008 New Revision: 51394 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/html/diff.py lxml/trunk/src/lxml/html/tests/test_diff.txt Log: r3461 at delle: sbehnel | 2008-02-11 19:27:27 +0100 code simplification Modified: lxml/trunk/src/lxml/html/diff.py ============================================================================== --- lxml/trunk/src/lxml/html/diff.py (original) +++ lxml/trunk/src/lxml/html/diff.py Mon Feb 11 19:28:01 2008 @@ -738,29 +738,17 @@ If skip_outer is true, then don't serialize the outermost tag """ - - html_xsl = """\ - - - - - - -""" - transform = etree.XSLT(etree.XML(html_xsl)) assert not isinstance(el, basestring), ( "You should pass in an element, not a string like %r" % el) - html = str(transform(el)) + html = etree.tostring(el, method="html", encoding="UTF-8") if skip_outer: # Get rid of the extra starting tag: html = html[html.find('>')+1:] - if skip_outer: # Get rid of the extra end tag: html = html[:html.rfind('<')] - if skip_outer: return html.strip() else: - return html.lstrip() + return html def _fixup_ins_del_tags(doc): """fixup_ins_del_tags that works on an lxml document in-place Modified: lxml/trunk/src/lxml/html/tests/test_diff.txt ============================================================================== --- lxml/trunk/src/lxml/html/tests/test_diff.txt (original) +++ lxml/trunk/src/lxml/html/tests/test_diff.txt Mon Feb 11 19:28:01 2008 @@ -204,10 +204,7 @@
Some text and

more text

>>> pfixup(''' ...
One tableMore stuff
''') - - - -
One tableMore stuff
+
One tableMore stuff
Testing split_unbalanced:: From ianb at codespeak.net Wed Feb 13 05:24:51 2008 From: ianb at codespeak.net (ianb at codespeak.net) Date: Wed, 13 Feb 2008 05:24:51 +0100 (CET) Subject: [Lxml-checkins] r51426 - lxml/trunk/src/lxml Message-ID: <20080213042451.7F1311683BE@codespeak.net> Author: ianb Date: Wed Feb 13 05:24:50 2008 New Revision: 51426 Modified: lxml/trunk/src/lxml/doctestcompare.py Log: add NOPARSE_MARKUP to __all__ Modified: lxml/trunk/src/lxml/doctestcompare.py ============================================================================== --- lxml/trunk/src/lxml/doctestcompare.py (original) +++ lxml/trunk/src/lxml/doctestcompare.py Wed Feb 13 05:24:50 2008 @@ -32,7 +32,7 @@ import doctest import cgi -__all__ = ['PARSE_HTML', 'PARSE_XML', 'LXMLOutputChecker', +__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker', 'LHTMLOutputChecker', 'install', 'temp_install'] PARSE_HTML = doctest.register_optionflag('PARSE_HTML') From ianb at codespeak.net Wed Feb 13 05:25:17 2008 From: ianb at codespeak.net (ianb at codespeak.net) Date: Wed, 13 Feb 2008 05:25:17 +0100 (CET) Subject: [Lxml-checkins] r51427 - in lxml/trunk: . src/lxml/html src/lxml/html/tests Message-ID: <20080213042517.926221683BE@codespeak.net> Author: ianb Date: Wed Feb 13 05:25:16 2008 New Revision: 51427 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/html/__init__.py lxml/trunk/src/lxml/html/tests/test_forms.txt Log: Fix case when inputs have no name Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Feb 13 05:25:16 2008 @@ -40,6 +40,10 @@ Bugs fixed ---------- +* Form elements would return None for ``form.fields.keys()`` if there + was an unnamed input field. Now unnamed input fields are completely + ignored. + Other changes ------------- Modified: lxml/trunk/src/lxml/html/__init__.py ============================================================================== --- lxml/trunk/src/lxml/html/__init__.py (original) +++ lxml/trunk/src/lxml/html/__init__.py Wed Feb 13 05:25:16 2008 @@ -818,7 +818,8 @@ def keys(self): names = sets.Set() for el in self: - names.add(el.name) + if el.name is not None: + names.add(el.name) return list(names) def __iter__(self): Modified: lxml/trunk/src/lxml/html/tests/test_forms.txt ============================================================================== --- lxml/trunk/src/lxml/html/tests/test_forms.txt (original) +++ lxml/trunk/src/lxml/html/tests/test_forms.txt Wed Feb 13 05:25:16 2008 @@ -141,3 +141,24 @@ single_checkbox2: 'good' check_group: +>>> import lxml.html +>>> tree = lxml.html.fromstring(''' +... +...
+... +... +...
+... +... ''') +>>> tree # doctest: +ELLIPSIS + +>>> tree.forms[0] # doctest: +ELLIPSIS + +>>> tree.forms[0].fields # doctest: +NOPARSE_MARKUP + +>>> tree.forms[0].fields.keys() +['foo'] +>>> tree.forms[0].fields.items() +[('foo', 'bar')] +>>> tree.forms[0].fields.values() +['bar'] From scoder at codespeak.net Wed Feb 13 21:48:40 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 13 Feb 2008 21:48:40 +0100 (CET) Subject: [Lxml-checkins] r51452 - in lxml/trunk: . src/lxml src/lxml/html Message-ID: <20080213204840.667B11683FE@codespeak.net> Author: scoder Date: Wed Feb 13 21:48:39 2008 New Revision: 51452 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/Makefile lxml/trunk/src/lxml/classlookup.pxi lxml/trunk/src/lxml/docloader.pxi lxml/trunk/src/lxml/dtd.pxi lxml/trunk/src/lxml/extensions.pxi lxml/trunk/src/lxml/html/usedoctest.py lxml/trunk/src/lxml/iterparse.pxi lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/lxml.objectify.pyx lxml/trunk/src/lxml/nsclasses.pxi lxml/trunk/src/lxml/objectpath.pxi lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/relaxng.pxi lxml/trunk/src/lxml/schematron.pxi lxml/trunk/src/lxml/usedoctest.py lxml/trunk/src/lxml/xinclude.pxi lxml/trunk/src/lxml/xmlerror.pxi lxml/trunk/src/lxml/xmlid.pxi lxml/trunk/src/lxml/xmlschema.pxi lxml/trunk/src/lxml/xpath.pxi lxml/trunk/src/lxml/xslt.pxi Log: r3463 at delle: sbehnel | 2008-02-13 00:07:06 +0100 huge docstring update to make signatures visible Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Feb 13 21:48:39 2008 @@ -8,6 +8,9 @@ Features added -------------- +* Docstrings now reflect the signature of functions and methods to + make them visible in API docs and ``help()`` + Bugs fixed ---------- Modified: lxml/trunk/Makefile ============================================================================== --- lxml/trunk/Makefile (original) +++ lxml/trunk/Makefile Wed Feb 13 21:48:39 2008 @@ -42,7 +42,7 @@ rm -fr doc/html/api @[ -x "`which epydoc`" ] \ && (cd src && echo "Generating API docs ..." && \ - PYTHONPATH=. epydoc -v -o ../doc/html/api --name lxml --url http://codespeak.net/lxml/ lxml/) \ + PYTHONPATH=. epydoc -v --docformat "restructuredtext en" -o ../doc/html/api --name lxml --url http://codespeak.net/lxml/ lxml/) \ || (echo "not generating epydoc API documentation") # XXX What should the default be? Modified: lxml/trunk/src/lxml/classlookup.pxi ============================================================================== --- lxml/trunk/src/lxml/classlookup.pxi (original) +++ lxml/trunk/src/lxml/classlookup.pxi Wed Feb 13 21:48:39 2008 @@ -53,7 +53,9 @@ # class to store element class lookup functions cdef public class ElementClassLookup [ type LxmlElementClassLookupType, object LxmlElementClassLookup ]: - """Superclass of Element class lookups. + """ElementClassLookup(self) + + Superclass of Element class lookups. """ cdef _element_class_lookup_function _lookup_function def __init__(self): @@ -62,18 +64,20 @@ cdef public class FallbackElementClassLookup(ElementClassLookup) \ [ type LxmlFallbackElementClassLookupType, object LxmlFallbackElementClassLookup ]: - """Superclass of Element class lookups with additional fallback. + """FallbackElementClassLookup(self, fallback=None) + + Superclass of Element class lookups with additional fallback. """ cdef readonly ElementClassLookup fallback cdef _element_class_lookup_function _fallback_function def __init__(self, ElementClassLookup fallback=None): self._lookup_function = NULL # use default lookup if fallback is not None: - self.setFallback(fallback) + self._setFallback(fallback) else: self._fallback_function = _lookupDefaultElementClass - def setFallback(self, ElementClassLookup lookup not None): + cdef void _setFallback(self, ElementClassLookup lookup): """Sets the fallback scheme for this lookup method. """ self.fallback = lookup @@ -81,6 +85,20 @@ if self._fallback_function is NULL: self._fallback_function = _lookupDefaultElementClass + def set_fallback(self, ElementClassLookup lookup not None): + """set_fallback(self, lookup) + + Sets the fallback scheme for this lookup method. + """ + self._setFallback(lookup) + + def setFallback(self, ElementClassLookup lookup not None): + """Sets the fallback scheme for this lookup method. + + :deprecated: use ``set_fallback()`` instead. + """ + self._setFallback(lookup) + cdef object _callFallback(self, _Document doc, xmlNode* c_node): return self._fallback_function(self.fallback, doc, c_node) @@ -89,7 +107,8 @@ # Custom Element class lookup schemes cdef class ElementDefaultClassLookup(ElementClassLookup): - """Element class lookup scheme that always returns the default Element + """ElementDefaultClassLookup(self, element=None, comment=None, pi=None, entity=None) + Element class lookup scheme that always returns the default Element class. The keyword arguments ``element``, ``comment``, ``pi`` and ``entity`` @@ -163,13 +182,14 @@ assert 0, "Unknown node type: %s" % c_node.type cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup): - """Checks an attribute of an Element and looks up the value in a class - dictionary. + """AttributeBasedElementClassLookup(self, attribute_name, class_mapping, fallback=None) + Checks an attribute of an Element and looks up the value in a + class dictionary. Arguments: - * attribute name - '{ns}name' style string - * class mapping - Python dict mapping attribute values to Element classes - * fallback - optional fallback lookup mechanism + - attribute name - '{ns}name' style string + - class mapping - Python dict mapping attribute values to Element classes + - fallback - optional fallback lookup mechanism A None key in the class mapping will be checked if the attribute is missing. @@ -207,7 +227,8 @@ cdef class ParserBasedElementClassLookup(FallbackElementClassLookup): - """Element class lookup based on the XML parser. + """ParserBasedElementClassLookup(self, fallback=None) + Element class lookup based on the XML parser. """ def __init__(self, ElementClassLookup fallback=None): FallbackElementClassLookup.__init__(self, fallback) @@ -221,7 +242,8 @@ cdef class CustomElementClassLookup(FallbackElementClassLookup): - """Element class lookup based on a subclass method. + """CustomElementClassLookup(self, fallback=None) + Element class lookup based on a subclass method. You can inherit from this class and override the method:: @@ -240,6 +262,7 @@ self._lookup_function = _custom_class_lookup def lookup(self, type, doc, namespace, name): + "lookup(self, type, doc, namespace, name)" return None cdef object _custom_class_lookup(state, _Document doc, xmlNode* c_node): @@ -291,11 +314,14 @@ LOOKUP_ELEMENT_CLASS = function def setElementClassLookup(ElementClassLookup lookup = None): - "@deprecated: use ``set_element_class_lookup(lookup)`` instead" + ":deprecated: use ``set_element_class_lookup(lookup)`` instead" set_element_class_lookup(lookup) def set_element_class_lookup(ElementClassLookup lookup = None): - "Set the global default element class lookup method." + """set_element_class_lookup(lookup = None) + + Set the global default element class lookup method. + """ if lookup is None or lookup._lookup_function is NULL: _setElementClassLookupFunction(NULL, None) else: Modified: lxml/trunk/src/lxml/docloader.pxi ============================================================================== --- lxml/trunk/src/lxml/docloader.pxi (original) +++ lxml/trunk/src/lxml/docloader.pxi Wed Feb 13 21:48:39 2008 @@ -15,7 +15,9 @@ cdef class Resolver: "This is the base class of all resolvers." def resolve(self, system_url, public_id, context): - """Override this method to resolve an external source by + """resolve(self, system_url, public_id, context) + + Override this method to resolve an external source by ``system_url`` and ``public_id``. The third argument is an opaque context object. @@ -24,7 +26,9 @@ return None def resolve_empty(self, context): - """Return an empty input document. + """resolve_empty(self, context) + + Return an empty input document. Pass context as parameter. """ @@ -34,7 +38,9 @@ return doc_ref def resolve_string(self, string, context, *, base_url=None): - """Return a parsable string as input document. + """resolve_string(self, string, context, base_url=None) + + Return a parsable string as input document. Pass data string and context as parameters. @@ -49,7 +55,9 @@ return doc_ref def resolve_filename(self, filename, context): - """Return the name of a parsable file as input document. + """resolve_filename(self, filename, context) + + Return the name of a parsable file as input document. Pass filename and context as parameters. """ @@ -60,7 +68,9 @@ return doc_ref def resolve_file(self, f, context): - """Return an open file-like object as input document. + """resolve_file(self, f, context) + + Return an open file-like object as input document. Pass open file and context as parameters. """ @@ -83,7 +93,9 @@ self._default_resolver = default_resolver def add(self, Resolver resolver not None): - """Register a resolver. + """add(self, resolver) + + Register a resolver. For each requested entity, the 'resolve' method of the resolver will be called and the result will be passed to the parser. If this method @@ -94,6 +106,7 @@ self._resolvers.add(resolver) def remove(self, resolver): + "remove(self, resolver)" self._resolvers.discard(resolver) cdef _ResolverRegistry _copy(self): @@ -103,9 +116,11 @@ return registry def copy(self): + "copy(self)" return self._copy() def resolve(self, system_url, public_id, context): + "resolve(self, system_url, public_id, context)" for resolver in self._resolvers: result = resolver.resolve(system_url, public_id, context) if result is not None: Modified: lxml/trunk/src/lxml/dtd.pxi ============================================================================== --- lxml/trunk/src/lxml/dtd.pxi (original) +++ lxml/trunk/src/lxml/dtd.pxi Wed Feb 13 21:48:39 2008 @@ -20,7 +20,8 @@ # DTD cdef class DTD(_Validator): - """A DTD validator. + """DTD(self, file=None, external_id=None) + A DTD validator. Can load from filesystem directly given a filename or file-like object. Alternatively, pass the keyword parameter ``external_id`` to load from a @@ -56,7 +57,9 @@ tree.xmlFreeDtd(self._c_dtd) def __call__(self, etree): - """Validate doc using the DTD. + """__call__(self, etree) + + Validate doc using the DTD. Returns true if the document is valid, false if not. """ Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Wed Feb 13 21:48:39 2008 @@ -326,7 +326,9 @@ self._temp_refs.add((<_Element>o)._doc) def Extension(module, function_mapping=None, *, ns=None): - """Build a dictionary of extension functions from the functions + """Extension(module, function_mapping=None, ns=None) + + Build a dictionary of extension functions from the functions defined in a module or the methods of an object. As second argument, you can pass an additional mapping of Modified: lxml/trunk/src/lxml/html/usedoctest.py ============================================================================== --- lxml/trunk/src/lxml/html/usedoctest.py (original) +++ lxml/trunk/src/lxml/html/usedoctest.py Wed Feb 13 21:48:39 2008 @@ -1,3 +1,13 @@ +"""Doctest module for HTML comparison. + +Usage:: + + >>> import lxml.html.usedoctest + >>> # now do your HTML doctests ... + +See `lxml.doctestcompare`. +""" + from lxml import doctestcompare doctestcompare.temp_install(html=True, del_module=__name__) Modified: lxml/trunk/src/lxml/iterparse.pxi ============================================================================== --- lxml/trunk/src/lxml/iterparse.pxi (original) +++ lxml/trunk/src/lxml/iterparse.pxi Wed Feb 13 21:48:39 2008 @@ -239,9 +239,12 @@ origEnd(ctxt, name) cdef class iterparse(_BaseParser): - """Incremental parser. Parses XML into a tree and generates tuples - (event, element) in a SAX-like fashion. ``event`` is any of 'start', - 'end', 'start-ns', 'end-ns'. + """iterparse(self, source, events=("end",), tag=None, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, remove_blank_text=False, remove_comments=False, remove_pis=False, encoding=None, html=False, schema=None) + Incremental parser. + + Parses XML into a tree and generates tuples (event, element) in a + SAX-like fashion. ``event`` is any of 'start', 'end', 'start-ns', + 'end-ns'. For 'start' and 'end', ``element`` is the Element that the parser just found opening or closing. For 'start-ns', it is a tuple (prefix, URI) of @@ -262,17 +265,17 @@ attribute default values are requested. Available boolean keyword arguments: - * attribute_defaults - read default attributes from DTD - * dtd_validation - validate (if DTD is available) - * load_dtd - use DTD for parsing - * no_network - prevent network access for related files - * remove_blank_text - discard blank text nodes - * remove_comments - discard comments - * remove_pis - discard processing instructions + - attribute_defaults - read default attributes from DTD + - dtd_validation - validate (if DTD is available) + - load_dtd - use DTD for parsing + - no_network - prevent network access for related files + - remove_blank_text - discard blank text nodes + - remove_comments - discard comments + - remove_pis - discard processing instructions Other keyword arguments: - * encoding - override the document encoding - * schema - an XMLSchema to validate against + - encoding - override the document encoding + - schema - an XMLSchema to validate against """ cdef object _source cdef readonly object root @@ -397,8 +400,10 @@ cdef class iterwalk: - """A tree walker that generates events from an existing tree as if it was - parsing XML data with ``iterparse()``. + """iterwalk(self, element_or_tree, events=("end",), tag=None) + + A tree walker that generates events from an existing tree as if it + was parsing XML data with ``iterparse()``. """ cdef object _node_stack cdef object _pop_node Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Wed Feb 13 21:48:39 2008 @@ -1,3 +1,9 @@ +"""The lxml.etree module implements the extended ElementTree API for +XML. +""" + +__docformat__ = "restructuredtext en" + cimport tree, python, config from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs from python cimport callable, _cstr, _isString @@ -214,7 +220,9 @@ cdef class QName: - """QName wrapper. + """QName(text_or_uri, tag=None) + + QName wrapper. Pass a tag name by itself or a namespace URI and a tag name to create a qualified name. The ``text`` property holds the @@ -510,7 +518,9 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: - """Element class. References a document object and a libxml node. + """Element class. + + References a document object and a libxml node. By pointing to a Document instance, a reference is kept to _Document as long as there is some pointer to a node in it. @@ -522,7 +532,9 @@ cdef object _attrib def _init(self): - """Called after object initialisation. Custom subclasses may override + """_init(self) + + Called after object initialisation. Custom subclasses may override this if they recursively call _init() in the superclasses. """ @@ -537,7 +549,9 @@ # MANIPULATORS def __setitem__(self, x, value): - """Replaces the given subelement index or slice. + """__setitem__(self, x, value) + + Replaces the given subelement index or slice. """ cdef xmlNode* c_node cdef xmlNode* c_next @@ -571,7 +585,9 @@ moveNodeToDocument(self._doc, c_node) def __delitem__(self, x): - """Deletes the given subelement or a slice. + """__delitem__(self, x) + + Deletes the given subelement or a slice. """ cdef xmlNode* c_node cdef xmlNode* c_next @@ -599,9 +615,11 @@ _removeNode(self._doc, c_node) def __deepcopy__(self, memo): + "__deepcopy__(self, memo)" return self.__copy__() def __copy__(self): + "__copy__(self)" cdef xmlDoc* c_doc cdef xmlNode* c_node cdef _Document new_doc @@ -619,17 +637,23 @@ return _elementFactory(new_doc, c_node) def set(self, key, value): - """Sets an element attribute. + """set(self, key, value) + + Sets an element attribute. """ _setAttributeValue(self, key, value) def append(self, _Element element not None): - """Adds a subelement to the end of this element. + """append(self, element) + + Adds a subelement to the end of this element. """ _appendChild(self, element) def addnext(self, _Element element): - """Adds the element as a following sibling directly after this + """addnext(self, element) + + Adds the element as a following sibling directly after this element. This is normally used to set a processing instruction or comment after @@ -644,7 +668,9 @@ _appendSibling(self, element) def addprevious(self, _Element element): - """Adds the element as a preceding sibling directly before this + """addprevious(self, element) + + Adds the element as a preceding sibling directly before this element. This is normally used to set a processing instruction or comment @@ -659,13 +685,17 @@ _prependSibling(self, element) def extend(self, elements): - """Extends the current children by the elements in the iterable. + """extend(self, elements) + + Extends the current children by the elements in the iterable. """ for element in elements: _appendChild(self, element) def clear(self): - """Resets an element. This function removes all subelements, clears + """clear(self) + + Resets an element. This function removes all subelements, clears all attributes and sets the text and tail properties to None. """ cdef xmlAttr* c_attr @@ -693,7 +723,9 @@ c_node = c_node_next def insert(self, index, _Element element not None): - """Inserts a subelement at the given position in this element + """insert(self, index, element) + + Inserts a subelement at the given position in this element """ cdef xmlNode* c_node cdef xmlNode* c_next @@ -707,7 +739,9 @@ moveNodeToDocument(self._doc, element._c_node) def remove(self, _Element element not None): - """Removes a matching subelement. Unlike the find methods, this + """remove(self, element) + + Removes a matching subelement. Unlike the find methods, this method compares elements based on identity, not on tag value or contents. """ @@ -724,7 +758,9 @@ def replace(self, _Element old_element not None, _Element new_element not None): - """Replaces a subelement with the element passed as second argument. + """replace(self, old_element, new_element) + + Replaces a subelement with the element passed as second argument. """ cdef xmlNode* c_old_node cdef xmlNode* c_old_next @@ -862,6 +898,7 @@ # ACCESSORS def __repr__(self): + "__repr__(self)" return "" % (self.tag, id(self)) def __getitem__(self, x): @@ -901,11 +938,14 @@ return _elementFactory(self._doc, c_node) def __len__(self): - """Returns the number of subelements. + """__len__(self) + + Returns the number of subelements. """ return _countElements(self._c_node.children) def __nonzero__(self): + "__nonzero__(self)" import warnings warnings.warn( "The behavior of this method will change in future versions. " @@ -916,6 +956,7 @@ return _hasChild(self._c_node) def __contains__(self, element): + "__contains__(self, element)" cdef xmlNode* c_node if not isinstance(element, _Element): return 0 @@ -923,13 +964,17 @@ return c_node is not NULL and c_node.parent is self._c_node def __iter__(self): + "__iter__(self)" return ElementChildIterator(self) def __reversed__(self): + "__reversed__(self)" return ElementChildIterator(self, reversed=True) def index(self, _Element child not None, start=None, stop=None): - """Find the position of the child within the parent. + """index(self, child, start=None, stop=None) + + Find the position of the child within the parent. This method is not part of the original ElementTree API. """ @@ -1012,40 +1057,52 @@ raise ValueError("list.index(x): x not in list") def get(self, key, default=None): - """Gets an element attribute. + """get(self, key, default=None) + + Gets an element attribute. """ return _getAttributeValue(self, key, default) def keys(self): - """Gets a list of attribute names. The names are returned in an + """keys(self) + + Gets a list of attribute names. The names are returned in an arbitrary order (just like for an ordinary Python dictionary). """ return _collectAttributes(self._c_node, 1) def values(self): - """Gets element attribute values as a sequence of strings. The + """values(self) + + Gets element attribute values as a sequence of strings. The attributes are returned in an arbitrary order. """ return _collectAttributes(self._c_node, 2) def items(self): - """Gets element attributes, as a sequence. The attributes are returned in + """items(self) + + Gets element attributes, as a sequence. The attributes are returned in an arbitrary order. """ return _collectAttributes(self._c_node, 3) def getchildren(self): - """Returns all direct children. The elements are returned in document + """getchildren(self) + + Returns all direct children. The elements are returned in document order. - @deprecated: Note that this method has been deprecated as of - ElementTree 1.3 and lxml 2.0. New code should use - ``list(element)`` or simply iterate over elements. + :deprecated: Note that this method has been deprecated as of + ElementTree 1.3 and lxml 2.0. New code should use + ``list(element)`` or simply iterate over elements. """ return _collectChildren(self) def getparent(self): - """Returns the parent of this element or None for the root element. + """getparent(self) + + Returns the parent of this element or None for the root element. """ cdef xmlNode* c_node c_node = _parentElement(self._c_node) @@ -1055,7 +1112,9 @@ return _elementFactory(self._doc, c_node) def getnext(self): - """Returns the following sibling of this element or None. + """getnext(self) + + Returns the following sibling of this element or None. """ cdef xmlNode* c_node c_node = _nextElement(self._c_node) @@ -1064,7 +1123,9 @@ return None def getprevious(self): - """Returns the preceding sibling of this element or None. + """getprevious(self) + + Returns the preceding sibling of this element or None. """ cdef xmlNode* c_node c_node = _previousElement(self._c_node) @@ -1073,7 +1134,9 @@ return None def itersiblings(self, tag=None, *, preceding=False): - """Iterate over the following or preceding siblings of this element. + """itersiblings(self, tag=None, preceding=False) + + Iterate over the following or preceding siblings of this element. The direction is determined by the 'preceding' keyword which defaults to False, i.e. forward iteration over the following siblings. The @@ -1083,7 +1146,9 @@ return SiblingsIterator(self, tag, preceding=preceding) def iterancestors(self, tag=None): - """Iterate over the ancestors of this element (from parent to parent). + """iterancestors(self, tag=None) + + Iterate over the ancestors of this element (from parent to parent). The generated elements can be restricted to a specific tag name with the 'tag' keyword. @@ -1091,7 +1156,9 @@ return AncestorsIterator(self, tag) def iterdescendants(self, tag=None): - """Iterate over the descendants of this element in document order. + """iterdescendants(self, tag=None) + + Iterate over the descendants of this element in document order. As opposed to ``el.iter()``, this iterator does not yield the element itself. The generated elements can be restricted to a specific tag @@ -1100,7 +1167,9 @@ return ElementDepthFirstIterator(self, tag, inclusive=False) def iterchildren(self, tag=None, *, reversed=False): - """Iterate over the children of this element. + """iterchildren(self, tag=None, reversed=False) + + Iterate over the children of this element. As opposed to using normal iteration on this element, the generated elements can be restricted to a specific tag name with the 'tag' @@ -1109,7 +1178,9 @@ return ElementChildIterator(self, tag, reversed=reversed) def getroottree(self): - """Return an ElementTree for the root node of the document that + """getroottree(self) + + Return an ElementTree for the root node of the document that contains this element. This is the same as following element.getparent() up the tree until it @@ -1118,7 +1189,9 @@ return _elementTreeFactory(self._doc, None) def getiterator(self, tag=None): - """Returns a sequence or iterator of all elements in the subtree in + """getiterator(self, tag=None) + + Returns a sequence or iterator of all elements in the subtree in document order (depth first pre-order), starting with this element. @@ -1128,18 +1201,20 @@ You can also pass the Element, Comment, ProcessingInstruction and Entity factory functions to look only for the specific element type. - @deprecated: Note that this method is deprecated as of - ElementTree 1.3 and lxml 2.0. It returns an iterator in lxml, - which diverges from the original ElementTree behaviour. If - you want an efficient iterator, use the ``element.iter()`` - method instead. You should only use this method in new code - if you require backwards compatibility with older versions of - lxml or ElementTree. + :deprecated: Note that this method is deprecated as of + ElementTree 1.3 and lxml 2.0. It returns an iterator in + lxml, which diverges from the original ElementTree + behaviour. If you want an efficient iterator, use the + ``element.iter()`` method instead. You should only use this + method in new code if you require backwards compatibility + with older versions of lxml or ElementTree. """ return ElementDepthFirstIterator(self, tag) def iter(self, tag=None): - """Iterate over all elements in the subtree in document order (depth + """iter(self, tag=None) + + Iterate over all elements in the subtree in document order (depth first pre-order), starting with this element. Can be restricted to find only elements with a specific tag @@ -1151,7 +1226,9 @@ return ElementDepthFirstIterator(self, tag) def itertext(self, tag=None, *, with_tail=True): - """Iterates over the text content of a subtree. + """itertext(self, tag=None, with_tail=True) + + Iterates over the text content of a subtree. You can pass the ``tag`` keyword argument to restrict text content to a specific tag name. @@ -1162,41 +1239,53 @@ return ElementTextIterator(self, tag, with_tail=with_tail) def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): - """Creates a new element associated with the same document. + """makeelement(self, _tag, attrib=None, nsmap=None, **_extra) + + Creates a new element associated with the same document. """ return _makeElement(_tag, NULL, self._doc, None, None, None, attrib, nsmap, _extra) def find(self, path): - """Finds the first matching subelement, by tag name or path. + """find(self, path) + + Finds the first matching subelement, by tag name or path. """ if isinstance(path, QName): path = (path).text return _elementpath.find(self, path) def findtext(self, path, default=None): - """Finds text for the first matching subelement, by tag name or path. + """findtext(self, path, default=None) + + Finds text for the first matching subelement, by tag name or path. """ if isinstance(path, QName): path = (path).text return _elementpath.findtext(self, path, default) def findall(self, path): - """Finds all matching subelements, by tag name or path. + """findall(self, path) + + Finds all matching subelements, by tag name or path. """ if isinstance(path, QName): path = (path).text return _elementpath.findall(self, path) def iterfind(self, path): - """Iterates over all matching subelements, by tag name or path. + """iterfind(self, path) + + Iterates over all matching subelements, by tag name or path. """ if isinstance(path, QName): path = (path).text return _elementpath.iterfind(self, path) def xpath(self, _path, *, namespaces=None, extensions=None, **_variables): - """Evaluate an xpath expression using the element as context node. + """xpath(self, _path, namespaces=None, extensions=None, **_variables) + + Evaluate an xpath expression using the element as context node. """ evaluator = XPathElementEvaluator(self, namespaces=namespaces, extensions=extensions) @@ -1260,15 +1349,19 @@ raise TypeError("this element does not have children or attributes") def set(self, key, value): + "set(self, key, value)" self._raiseImmutable() def append(self, value): + "append(self, value)" self._raiseImmutable() def insert(self, index, value): + "insert(self, index, value)" self._raiseImmutable() def __setitem__(self, index, value): + "__setitem__(self, index, value)" self._raiseImmutable() property attrib: @@ -1294,24 +1387,30 @@ # ACCESSORS def __getitem__(self, x): + "__getitem__(self, x)" if python.PySlice_Check(x): return [] else: raise IndexError("list index out of range") def __len__(self): + "__len__(self)" return 0 def get(self, key, default=None): + "get(self, key, default=None)" return None def keys(self): + "keys(self)" return [] def items(self): + "items(self)" return [] def values(self): + "values(self)" return [] cdef class _Comment(__ContentOnlyElement): @@ -1390,7 +1489,9 @@ "ElementTree not initialized, missing root" def parse(self, source, _BaseParser parser=None): - """Updates self with the content of source and returns its root + """parse(self, source, parser=None) + + Updates self with the content of source and returns its root """ cdef _Document doc doc = _parseDocument(source, parser) @@ -1402,7 +1503,9 @@ return self._context_node def _setroot(self, _Element root not None): - """Relocate the ElementTree to a new root node. + """_setroot(self, root) + + Relocate the ElementTree to a new root node. """ if root._c_node.type != tree.XML_ELEMENT_NODE: raise TypeError("Only elements can be the root of an ElementTree") @@ -1410,7 +1513,9 @@ self._doc = None def getroot(self): - """Gets the root element for this tree. + """getroot(self) + + Gets the root element for this tree. """ return self._context_node @@ -1445,7 +1550,10 @@ def write(self, file, *, encoding=None, method="xml", pretty_print=False, xml_declaration=None, with_tail=True): - """Write the tree to a file or file-like object. + """write(self, file, encoding=None, method="xml", + pretty_print=False, xml_declaration=None, with_tail=True) + + Write the tree to a file or file-like object. Defaults to ASCII encoding and writing a declaration as needed. @@ -1470,7 +1578,9 @@ write_declaration, 1, pretty_print, with_tail) def getpath(self, _Element element not None): - """Returns a structural, absolute XPath expression to find that element. + """getpath(self, element) + + Returns a structural, absolute XPath expression to find that element. """ cdef _Document doc cdef xmlDoc* c_doc @@ -1488,7 +1598,9 @@ return path def getiterator(self, tag=None): - """Returns a sequence or iterator of all elements in document order + """getiterator(self, tag=None) + + Returns a sequence or iterator of all elements in document order (depth first pre-order), starting with the root element. Can be restricted to find only elements with a specific tag @@ -1498,13 +1610,13 @@ You can also pass the Element, Comment, ProcessingInstruction and Entity factory functions to look only for the specific element type. - @deprecated: Note that this method is deprecated as of - ElementTree 1.3 and lxml 2.0. It returns an iterator in lxml, - which diverges from the original ElementTree behaviour. If - you want an efficient iterator, use the ``tree.iter()`` method - instead. You should only use this method in new code if you - require backwards compatibility with older versions of lxml or - ElementTree. + :deprecated: Note that this method is deprecated as of + ElementTree 1.3 and lxml 2.0. It returns an iterator in + lxml, which diverges from the original ElementTree + behaviour. If you want an efficient iterator, use the + ``tree.iter()`` method instead. You should only use this + method in new code if you require backwards compatibility + with older versions of lxml or ElementTree. """ root = self.getroot() if root is None: @@ -1512,7 +1624,9 @@ return root.getiterator(tag) def iter(self, tag=None): - """Creates an iterator for the root element. The iterator loops over + """iter(self, tag=None) + + Creates an iterator for the root element. The iterator loops over all elements in this tree, in document order. """ root = self.getroot() @@ -1521,7 +1635,9 @@ return root.iter(tag) def find(self, path): - """Finds the first toplevel element with given tag. Same as + """find(self, path) + + Finds the first toplevel element with given tag. Same as ``tree.getroot().find(path)``. """ self._assertHasRoot() @@ -1531,7 +1647,9 @@ return root.find(path) def findtext(self, path, default=None): - """Finds the text for the first element matching the ElementPath + """findtext(self, path, default=None) + + Finds the text for the first element matching the ElementPath expression. Same as getroot().findtext(path) """ self._assertHasRoot() @@ -1541,7 +1659,9 @@ return root.findtext(path, default) def findall(self, path): - """Finds all elements matching the ElementPath expression. Same as + """findall(self, path) + + Finds all elements matching the ElementPath expression. Same as getroot().findall(path). """ self._assertHasRoot() @@ -1551,7 +1671,9 @@ return root.findall(path) def iterfind(self, path): - """Iterates over all elements matching the ElementPath expression. + """iterfind(self, path) + + Iterates over all elements matching the ElementPath expression. Same as getroot().finditer(path). """ self._assertHasRoot() @@ -1561,7 +1683,9 @@ return root.iterfind(path) def xpath(self, _path, *, namespaces=None, extensions=None, **_variables): - """XPath evaluate in context of document. + """xpath(self, _path, namespaces=None, extensions=None, **_variables) + + XPath evaluate in context of document. ``namespaces`` is an optional dictionary with prefix to namespace URI mappings, used by XPath. ``extensions`` defines additional extension @@ -1582,7 +1706,9 @@ return evaluator.evaluate(_path, **_variables) def xslt(self, _xslt, extensions=None, access_control=None, **_kw): - """Transform this document using other document. + """xslt(self, _xslt, extensions=None, access_control=None, **_kw) + + Transform this document using other document. xslt is a tree that should be XSLT keyword parameters are XSLT transformation parameters. @@ -1599,7 +1725,9 @@ return style(self, **_kw) def relaxng(self, relaxng): - """Validate this document using other document. + """relaxng(self, relaxng) + + Validate this document using other document. The relaxng argument is a tree that should contain a Relax NG schema. @@ -1615,7 +1743,9 @@ return schema.validate(self) def xmlschema(self, xmlschema): - """Validate this document using other document. + """xmlschema(self, xmlschema) + + Validate this document using other document. The xmlschema argument is a tree that should contain an XML Schema. @@ -1631,7 +1761,9 @@ return schema.validate(self) def xinclude(self): - """Process the XInclude nodes in this document and include the + """xinclude(self) + + Process the XInclude nodes in this document and include the referenced XML fragments. There is support for loading files through the file system, HTTP and @@ -1645,7 +1777,9 @@ XInclude()(self._context_node) def write_c14n(self, file): - """C14N write of document. Always writes UTF-8. + """write_c14n(self, file) + + C14N write of document. Always writes UTF-8. """ self._assertHasRoot() _tofilelikeC14N(file, self._context_node) @@ -1666,9 +1800,7 @@ cdef class _Attrib: - """A proxy for the ``Element.attrib`` property. - - Behaves as a normal Python dict. + """A dict-like proxy for the ``Element.attrib`` property. """ cdef _Element _element def __init__(self, _Element element not None): @@ -1895,7 +2027,9 @@ return current_node cdef class ElementChildIterator(_ElementIterator): - "Iterates over the children of an element." + """ElementChildIterator(self, node, tag=None, reversed=False) + Iterates over the children of an element. + """ def __init__(self, _Element node not None, tag=None, *, reversed=False): cdef xmlNode* c_node self._initTagMatch(tag) @@ -1916,7 +2050,8 @@ self._node = _elementFactory(node._doc, c_node) cdef class SiblingsIterator(_ElementIterator): - """Iterates over the siblings of an element. + """SiblingsIterator(self, node, tag=None, preceding=False) + Iterates over the siblings of an element. You can pass the boolean keyword ``preceding`` to specify the direction. """ @@ -1929,18 +2064,24 @@ self._storeNext(node) cdef class AncestorsIterator(_ElementIterator): - "Iterates over the ancestors of an element (from parent to parent)." + """AncestorsIterator(self, node, tag=None) + Iterates over the ancestors of an element (from parent to parent). + """ def __init__(self, _Element node not None, tag=None): self._initTagMatch(tag) self._next_element = _parentElement self._storeNext(node) cdef class ElementDepthFirstIterator(_ElementTagMatcher): - """Iterates over an element and its sub-elements in document order (depth - first pre-order). Note that this also includes comments, entities and - processing instructions. To filter them out, check if the ``tag`` - property of the returned element is a string (i.e. not None and not a - factory function), or pass the ``Element`` factory for the ``tag`` keyword. + """ElementDepthFirstIterator(self, node, tag=None, inclusive=True) + Iterates over an element and its sub-elements in document order (depth + first pre-order). + + Note that this also includes comments, entities and processing + instructions. To filter them out, check if the ``tag`` property + of the returned element is a string (i.e. not None and not a + factory function), or pass the ``Element`` factory for the ``tag`` + keyword. If the optional ``tag`` argument is not None, the iterator returns only the elements that match the respective name and namespace. @@ -2003,7 +2144,8 @@ return NULL cdef class ElementTextIterator: - """Iterates over the text content of a subtree. + """ElementTextIterator(self, element, tag=None, with_tail=True) + Iterates over the text content of a subtree. You can pass the ``tag`` keyword argument to restrict text content to a specific tag name. @@ -2057,7 +2199,9 @@ # module-level API for ElementTree def Element(_tag, attrib=None, nsmap=None, **_extra): - """Element factory. This function returns an object implementing the + """Element(_tag, attrib=None, nsmap=None, **_extra) + + Element factory. This function returns an object implementing the Element interface. """ ### also look at _Element.makeelement() and _BaseParser.makeelement() ### @@ -2065,7 +2209,9 @@ attrib, nsmap, _extra) def Comment(text=None): - """Comment element factory. This factory function creates a special element that will + """Comment(text=None) + + Comment element factory. This factory function creates a special element that will be serialized as an XML comment. """ cdef _Document doc @@ -2082,7 +2228,9 @@ return _elementFactory(doc, c_node) def ProcessingInstruction(target, text=None): - """ProcessingInstruction element factory. This factory function creates a + """ProcessingInstruction(target, text=None) + + ProcessingInstruction element factory. This factory function creates a special element that will be serialized as an XML processing instruction. """ cdef _Document doc @@ -2102,7 +2250,9 @@ PI = ProcessingInstruction def Entity(name): - """Entity factory. This factory function creates a special element + """Entity(name) + + Entity factory. This factory function creates a special element that will be serialized as an XML entity reference or character reference. Note, however, that entities will not be automatically declared in the document. A document that uses entity references @@ -2127,13 +2277,17 @@ def SubElement(_Element _parent not None, _tag, attrib=None, nsmap=None, **_extra): - """Subelement factory. This function creates an element instance, and + """SubElement(_parent, _tag, attrib=None, nsmap=None, **_extra) + + Subelement factory. This function creates an element instance, and appends it to an existing element. """ return _makeSubElement(_parent, _tag, None, None, attrib, nsmap, _extra) def ElementTree(_Element element=None, *, file=None, _BaseParser parser=None): - """ElementTree wrapper class. + """ElementTree(element=None, file=None, parser=None) + + ElementTree wrapper class. """ cdef xmlNode* c_next cdef xmlNode* c_node @@ -2156,7 +2310,9 @@ return _elementTreeFactory(doc, element) def HTML(text, _BaseParser parser=None, *, base_url=None): - """Parses an HTML document from a string constant. This function can be used + """HTML(text, parser=None, base_url=None) + + Parses an HTML document from a string constant. This function can be used to embed "HTML literals" in Python code. To override the parser with a different ``HTMLParser`` you can pass it to @@ -2178,7 +2334,9 @@ return result_container.result def XML(text, _BaseParser parser=None, *, base_url=None): - """Parses an XML document from a string constant. This function can be used + """XML(text, parser=None, base_url=None) + + Parses an XML document from a string constant. This function can be used to embed "XML literals" in Python code, like in >>> root = etree.XML("") @@ -2202,7 +2360,9 @@ return result_container.result def fromstring(text, _BaseParser parser=None, *, base_url=None): - """Parses an XML document from a string. + """fromstring(text, parser=None, base_url=None) + + Parses an XML document from a string. To override the default parser with a different parser you can pass it to the ``parser`` keyword argument. @@ -2219,7 +2379,9 @@ return result_container.result def fromstringlist(strings, _BaseParser parser=None): - """Parses an XML document from a sequence of strings. + """fromstringlist(strings, parser=None) + + Parses an XML document from a sequence of strings. To override the default parser with a different parser you can pass it to the ``parser`` keyword argument. @@ -2233,19 +2395,26 @@ return parser.close() def iselement(element): - """Checks if an object appears to be a valid element object. + """iselement(element) + + Checks if an object appears to be a valid element object. """ return isinstance(element, _Element) def dump(_Element elem not None, *, pretty_print=True, with_tail=True): - """Writes an element tree or element structure to sys.stdout. This function + """dump(elem, pretty_print=True, with_tail=True) + + Writes an element tree or element structure to sys.stdout. This function should be used for debugging only. """ _dumpToFile(sys.stdout, elem._c_node, pretty_print, with_tail) def tostring(element_or_tree, *, encoding=None, method="xml", xml_declaration=None, pretty_print=False, with_tail=True): - """Serialize an element to an encoded string representation of its XML + """tostring(element_or_tree, encoding=None, method="xml", + xml_declaration=None, pretty_print=False, with_tail=True) + + Serialize an element to an encoded string representation of its XML tree. Defaults to ASCII encoding without XML declaration. This behaviour can be @@ -2292,7 +2461,9 @@ type(element_or_tree)) def tostringlist(element_or_tree, *args, **kwargs): - """Serialize an element to an encoded string representation of its XML + """tostringlist(element_or_tree, *args, **kwargs) + + Serialize an element to an encoded string representation of its XML tree, stored in a list of partial strings. This is purely for ElementTree 1.3 compatibility. The result is a @@ -2302,7 +2473,10 @@ def tounicode(element_or_tree, *, method="xml", pretty_print=False, with_tail=True): - """Serialize an element to the Python unicode representation of its XML + """tounicode(element_or_tree, method="xml", pretty_print=False, + with_tail=True) + + Serialize an element to the Python unicode representation of its XML tree. Note that the result does not carry an XML encoding declaration and is @@ -2318,7 +2492,7 @@ by passing the boolean ``with_tail`` option. This has no impact on the tail text of children, which will always be serialised. - @deprecated: use ``tostring(el, encoding=unicode)`` instead. + :deprecated: use ``tostring(el, encoding=unicode)`` instead. """ if isinstance(element_or_tree, _Element): return _tounicode(<_Element>element_or_tree, method, 0, pretty_print, @@ -2331,7 +2505,9 @@ type(element_or_tree)) def parse(source, _BaseParser parser=None): - """Return an ElementTree object loaded with source elements. If no parser + """parse(source, parser=None) + + Return an ElementTree object loaded with source elements. If no parser is provided as second argument, the default parser is used. """ cdef _Document doc @@ -2366,8 +2542,10 @@ # Validation class DocumentInvalid(LxmlError): - """Validation error. Raised by all document validators when their - ``assertValid(tree)`` method fails. + """Validation error. + + Raised by all document validators when their ``assertValid(tree)`` + method fails. """ pass @@ -2375,28 +2553,39 @@ "Base class for XML validators." cdef _ErrorLog _error_log def __init__(self): + "__init__(self)" self._error_log = _ErrorLog() def validate(self, etree): - """Validate the document using this schema. + """validate(self, etree) + + Validate the document using this schema. - Returns true if document is valid, false if not.""" + Returns true if document is valid, false if not. + """ return self(etree) def assertValid(self, etree): - "Raises DocumentInvalid if the document does not comply with the schema." + """assertValid(self, etree) + + Raises `DocumentInvalid` if the document does not comply with the schema. + """ if not self(etree): raise DocumentInvalid(self._error_log._buildExceptionMessage( "Document does not comply with schema"), self._error_log) def assert_(self, etree): - "Raises AssertionError if the document does not comply with the schema." + """assert_(self, etree) + + Raises `AssertionError` if the document does not comply with the schema. + """ if not self(etree): raise AssertionError(self._error_log._buildExceptionMessage( "Document does not comply with schema")) property error_log: + "The log of validation errors and warnings." def __get__(self): return self._error_log.copy() Modified: lxml/trunk/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.objectify.pyx (original) +++ lxml/trunk/src/lxml/lxml.objectify.pyx Wed Feb 13 21:48:39 2008 @@ -153,7 +153,9 @@ return _countSiblings(self._c_node) def countchildren(self): - """Return the number of children of this element, regardless of their + """countchildren(self) + + Return the number of children of this element, regardless of their name. """ # copied from etree @@ -168,7 +170,9 @@ return c def getchildren(self): - """Returns a sequence of all direct children. The elements are + """getchildren(self) + + Returns a sequence of all direct children. The elements are returned in document order. """ cdef tree.xmlNode* c_node @@ -217,7 +221,9 @@ self.remove(child) def addattr(self, tag, value): - """Add a child value to the element. + """addattr(self, tag, value) + + Add a child value to the element. As opposed to append(), it sets a data value, not an element. """ @@ -331,18 +337,21 @@ parent.remove(sibling) def iterfind(self, path): + "iterfind(self, path)" # Reimplementation of Element.iterfind() to make it work without child # iteration. xpath = etree.ETXPath(path) return iter(xpath(self)) def findall(self, path): + "findall(self, path)" # Reimplementation of Element.findall() to make it work without child # iteration. xpath = etree.ETXPath(path) return xpath(self) def find(self, path): + "find(self, path)" # Reimplementation of Element.find() to make it work without child # iteration. result = self.findall(path) @@ -354,6 +363,7 @@ return None def findtext(self, path, default=None): + "findtext(self, path, default=None)" # Reimplementation of Element.findtext() to make it work without child # iteration. result = self.find(path) @@ -363,7 +373,9 @@ return default def descendantpaths(self, prefix=None): - """Returns a list of object path expressions for all descendants. + """descendantpaths(self, prefix=None) + + Returns a list of object path expressions for all descendants. """ if prefix is not None and not python._isString(prefix): prefix = '.'.join(prefix) @@ -853,7 +865,8 @@ # Python type registry cdef class PyType: - """User defined type. + """PyType(self, name, type_check, type_class, stringify=None) + User defined type. Named type that contains a type check function and a type class that inherits from ObjectifiedDataElement. The type check must take a string @@ -862,6 +875,7 @@ guessing. Example:: + PyType('int', int, MyIntClass).register() Note that the order in which types are registered matters. The first @@ -894,7 +908,9 @@ return "PyType(%s, %s)" % (self.name, self._type.__name__) def register(self, before=None, after=None): - """Register the type. + """register(self, before=None, after=None) + + Register the type. The additional keyword arguments 'before' and 'after' accept a sequence of type names that must appear before/after the new type in @@ -933,6 +949,7 @@ _SCHEMA_TYPE_DICT[xs_type] = self def unregister(self): + "unregister(self)" if _PYTYPE_DICT.get(self.name) is self: del _PYTYPE_DICT[self.name] for xs_type, pytype in _SCHEMA_TYPE_DICT.items(): @@ -989,7 +1006,9 @@ return _typename(obj) def pytypename(obj): - """Find the name of the corresponding PyType for a Python object. + """pytypename(obj) + + Find the name of the corresponding PyType for a Python object. """ return _pytypename(obj) @@ -1035,7 +1054,9 @@ _registerPyTypes() def getRegisteredTypes(): - """Returns a list of the currently registered PyType objects. + """getRegisteredTypes() + + Returns a list of the currently registered PyType objects. To add a new type, retrieve this list and call unregister() for all entries. Then add the new type at a suitable position (possibly replacing @@ -1099,6 +1120,8 @@ cdef _ObjectifyElementMakerCaller NEW_ELEMENT_MAKER "PY_NEW" (object t) cdef class ElementMaker: + """ElementMaker(self, namespace=None, nsmap=None, annotate=True, makeelement=None) + """ cdef object _makeelement cdef object _namespace cdef object _nsmap @@ -1137,6 +1160,7 @@ cdef bint _annotate def __call__(self, *children, **attrib): + "__call__(self, *children, **attrib)" cdef _ObjectifyElementMakerCaller elementMaker cdef python.PyObject* pytype cdef _Element element @@ -1214,14 +1238,18 @@ __RECURSIVE_STR = 0 # default: off def enableRecursiveStr(on=True): - """Enable a recursively generated tree representation for str(element), + """enableRecursiveStr(on=True) + + Enable a recursively generated tree representation for str(element), based on objectify.dump(element). """ global __RECURSIVE_STR __RECURSIVE_STR = on def dump(_Element element not None): - """Return a recursively generated string representation of an element. + """dump(_Element element not None) + + Return a recursively generated string representation of an element. """ return _dump(element, 0) @@ -1268,6 +1296,7 @@ copy_reg.pickle(ObjectifiedElement, reduceFunction, fromstring) def pickleReduce(obj): + "pickleReduce(obj)" return (fromstring, (etree.tostring(obj),)) _setupPickle(pickleReduce) @@ -1277,7 +1306,8 @@ # Element class lookup cdef class ObjectifyElementClassLookup(ElementClassLookup): - """Element class lookup method that uses the objectify classes. + """ObjectifyElementClassLookup(self, tree_class=None, empty_data_class=None) + Element class lookup method that uses the objectify classes. """ cdef object empty_data_class cdef object tree_class @@ -1363,7 +1393,9 @@ def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False, empty_pytype=None): - """Recursively annotates the elements of an XML tree with 'pytype' + """pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, empty_pytype=None) + + Recursively annotates the elements of an XML tree with 'pytype' attributes. If the 'ignore_old' keyword argument is True (the default), current 'pytype' @@ -1384,7 +1416,9 @@ def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False, empty_type=None): - """Recursively annotates the elements of an XML tree with 'xsi:type' + """xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None) + + Recursively annotates the elements of an XML tree with 'xsi:type' attributes. If the 'ignore_old' keyword argument is True (the default), current @@ -1411,7 +1445,9 @@ def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1): - """Recursively annotates the elements of an XML tree with 'xsi:type' + """annotate(element_or_tree, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1) + + Recursively annotates the elements of an XML tree with 'xsi:type' and/or 'py:pytype' attributes. If the 'ignore_old' keyword argument is True (the default), current @@ -1597,7 +1633,9 @@ tree.END_FOR_EACH_ELEMENT_FROM(c_node) def deannotate(element_or_tree, *, pytype=True, xsi=True): - """Recursively de-annotate the elements of an XML tree by removing 'pytype' + """deannotate(element_or_tree, pytype=True, xsi=True) + + Recursively de-annotate the elements of an XML tree by removing 'pytype' and/or 'type' attributes. If the 'pytype' keyword argument is True (the default), 'pytype' attributes @@ -1642,11 +1680,13 @@ objectify_parser = __DEFAULT_PARSER def setDefaultParser(new_parser = None): - "This function is deprecated, use ``set_default_parser()`` instead." + ":deprecated: use ``set_default_parser()`` instead." set_default_parser(new_parser) def set_default_parser(new_parser = None): - """Replace the default parser used by objectify's Element() and + """set_default_parser(new_parser = None) + + Replace the default parser used by objectify's Element() and fromstring() functions. The new parser must be an etree.XMLParser. @@ -1662,7 +1702,9 @@ raise TypeError("parser must inherit from lxml.etree.XMLParser") def makeparser(**kw): - """Create a new XML parser for objectify trees. + """makeparser(remove_blank_text=True, **kw) + + Create a new XML parser for objectify trees. You can pass all keyword arguments that are supported by ``etree.XMLParser()``. Note that this parser defaults to removing @@ -1685,7 +1727,9 @@ _fromstring = etree.fromstring def fromstring(xml, parser=None): - """Objectify specific version of the lxml.etree fromstring() function + """fromstring(xml, parser=None) + + Objectify specific version of the lxml.etree fromstring() function that uses the objectify parser. You can pass a different parser as second argument. @@ -1700,7 +1744,9 @@ _parse = etree.parse def parse(f, parser=None): - """Parse a file or file-like object with the objectify parser. + """parse(f, parser=None) + + Parse a file or file-like object with the objectify parser. You can pass a different parser as second argument. """ @@ -1716,7 +1762,9 @@ E = ElementMaker() def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes): - """Objectify specific version of the lxml.etree Element() factory that + """Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes) + + Objectify specific version of the lxml.etree Element() factory that always creates a structural (tree) element. NOTE: requires parser based element class lookup activated in lxml.etree! @@ -1734,7 +1782,9 @@ def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None, **_attributes): - """Create a new element from a Python value and XML attributes taken from + """DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, **_attributes) + + Create a new element from a Python value and XML attributes taken from keyword arguments or a dictionary passed as second argument. Automatically adds a 'pytype' attribute for the Python type of the value, Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Wed Feb 13 21:48:39 2008 @@ -28,10 +28,14 @@ self._entries = {} def update(self, class_dict_iterable): - """Forgivingly update the registry. If registered values do not match - the required type for this registry, or if their name starts with '_', - they will be silently discarded. This allows registrations at the - module or class level using vars(), globals() etc.""" + """update(self, class_dict_iterable) + + Forgivingly update the registry. + + If registered values do not match the required type for this + registry, or if their name starts with '_', they will be + silently discarded. This allows registrations at the module or + class level using vars(), globals() etc.""" if hasattr(class_dict_iterable, 'items'): class_dict_iterable = class_dict_iterable.items() for name, item in class_dict_iterable: @@ -89,7 +93,9 @@ cdef class ElementNamespaceClassLookup(FallbackElementClassLookup): - """Element class lookup scheme that searches the Element class in the + """ElementNamespaceClassLookup(self, fallback=None) + + Element class lookup scheme that searches the Element class in the Namespace registry. """ cdef object _namespace_registries @@ -99,8 +105,11 @@ self._lookup_function = _find_nselement_class def get_namespace(self, ns_uri): - """Retrieve the namespace object associated with the given URI. Creates a - new one if it does not yet exist.""" + """get_namespace(self, ns_uri) + + Retrieve the namespace object associated with the given URI. + + Creates a new one if it does not yet exist.""" if ns_uri: ns_utf = _utf8(ns_uri) else: @@ -156,9 +165,13 @@ __FUNCTION_NAMESPACE_REGISTRIES = {} def FunctionNamespace(ns_uri): - """Retrieve the function namespace object associated with the given - URI. Creates a new one if it does not yet exist. A function namespace can - only be used to register extension functions.""" + """FunctionNamespace(ns_uri) + + Retrieve the function namespace object associated with the given + URI. + + Creates a new one if it does not yet exist. A function namespace + can only be used to register extension functions.""" if ns_uri: ns_utf = _utf8(ns_uri) else: Modified: lxml/trunk/src/lxml/objectpath.pxi ============================================================================== --- lxml/trunk/src/lxml/objectpath.pxi (original) +++ lxml/trunk/src/lxml/objectpath.pxi Wed Feb 13 21:48:39 2008 @@ -8,7 +8,8 @@ cdef class ObjectPath: - """Immutable object that represents a compiled object path. + """ObjectPath(path) + Immutable object that represents a compiled object path. Example for a path: 'root.child[1].{other}child[25]' """ @@ -54,6 +55,7 @@ default, use_default) def hasattr(self, _Element root not None): + "hasattr(self, root)" try: _findObjectPath(root, self._c_path, self._path_len, None, 0) except AttributeError: @@ -61,14 +63,18 @@ return True def setattr(self, _Element root not None, value): - """Set the value of the target element in a subtree. + """setattr(self, root, value) + + Set the value of the target element in a subtree. If any of the children on the path does not exist, it is created. """ _createObjectPath(root, self._c_path, self._path_len, 1, value) def addattr(self, _Element root not None, value): - """Append a value to the target element in a subtree. + """addattr(self, root, value) + + Append a value to the target element in a subtree. If any of the children on the path does not exist, it is created. """ Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Wed Feb 13 21:48:39 2008 @@ -668,6 +668,7 @@ return context._error_log.copy() property resolvers: + "The custom resolver registry of this parser." def __get__(self): return self._resolvers @@ -681,7 +682,9 @@ self.set_element_class_lookup(lookup) def set_element_class_lookup(self, ElementClassLookup lookup = None): - """Set a lookup scheme for element classes generated from this parser. + """set_element_class_lookup(self, lookup = None) + + Set a lookup scheme for element classes generated from this parser. Reset it by passing None or nothing. """ @@ -702,11 +705,16 @@ return parser def copy(self): - "Create a new parser with the same configuration." + """copy(self) + + Create a new parser with the same configuration. + """ return self._copy() def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): - """Creates a new element associated with this parser. + """makeelement(self, _tag, attrib=None, nsmap=None, **_extra) + + Creates a new element associated with this parser. """ return _makeElement(_tag, NULL, None, self, None, None, attrib, nsmap, _extra) @@ -861,7 +869,9 @@ return context._error_log.copy() def feed(self, data): - """Feeds data to the parser. The argument should be an 8-bit string + """feed(self, data) + + Feeds data to the parser. The argument should be an 8-bit string buffer containing encoded data, although Unicode is supported as long as both string types are not mixed. @@ -942,7 +952,9 @@ context.cleanup() def close(self): - """Terminates feeding data to this parser. This tells the parser to + """close(self) + + Terminates feeding data to this parser. This tells the parser to process any remaining data in the feed buffer, and then returns the root Element of the tree that was parsed. @@ -1303,7 +1315,8 @@ ############################################################ cdef class TreeBuilder(_SaxParserTarget): - """Parser target that builds a tree. + """TreeBuilder(self, element_factory=None, parser=None) + Parser target that builds a tree. The final tree is returned by the ``close()`` method. """ @@ -1343,7 +1356,9 @@ # Python level event handlers def close(self): - """Flushes the builder buffers, and returns the toplevel document + """close(self) + + Flushes the builder buffers, and returns the toplevel document element. """ assert python.PyList_GET_SIZE(self._element_stack) == 0, "missing end tags" @@ -1351,19 +1366,27 @@ return self._last def data(self, data): - """Adds text to the current element. The value should be either an + """data(self, data) + + Adds text to the current element. The value should be either an 8-bit string containing ASCII text, or a Unicode string. """ self._handleSaxData(data) def start(self, tag, attrs, nsmap=None): - "Opens a new element." + """start(self, tag, attrs, nsmap=None) + + Opens a new element. + """ if nsmap is None: nsmap = EMPTY_READ_ONLY_DICT return self._handleSaxStart(tag, attrs, nsmap) def end(self, tag): - "Closes the current element." + """end(self, tag) + + Closes the current element. + """ element = self._handleSaxEnd(tag) assert self._last.tag == tag,\ "end tag mismatch (expected %s, got %s)" % ( @@ -1371,9 +1394,13 @@ return element def pi(self, target, data): + """pi(self, target, data) + """ return self._handleSaxPi(target, data) def comment(self, comment): + """comment(self, comment) + """ return self._handleSaxComment(comment) # internal SAX event handlers @@ -1432,33 +1459,36 @@ ) cdef class XMLParser(_FeedParser): - """The XML parser. Parsers can be supplied as additional argument to - various parse functions of the lxml API. A default parser is always - available and can be replaced by a call to the global function - 'set_default_parser'. New parsers can be created at any time without a - major run-time overhead. + """XMLParser(self, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, remove_blank_text=False, compact=True, resolve_entities=True, remove_comments=False, remove_pis=False, target=None, encoding=None, schema=None) + The XML parser. + + Parsers can be supplied as additional argument to various parse + functions of the lxml API. A default parser is always available + and can be replaced by a call to the global function + 'set_default_parser'. New parsers can be created at any time + without a major run-time overhead. The keyword arguments in the constructor are mainly based on the libxml2 parser configuration. A DTD will also be loaded if validation or attribute default values are requested. Available boolean keyword arguments: - * attribute_defaults - read default attributes from DTD - * dtd_validation - validate (if DTD is available) - * load_dtd - use DTD for parsing - * no_network - prevent network access for related files (default: True) - * ns_clean - clean up redundant namespace declarations - * recover - try hard to parse through broken XML - * remove_blank_text - discard blank text nodes - * remove_comments - discard comments - * remove_pis - discard processing instructions - * compact - safe memory for short text content (default: True) - * resolve_entities - replace entities by their text value (default: True) + - attribute_defaults - read default attributes from DTD + - dtd_validation - validate (if DTD is available) + - load_dtd - use DTD for parsing + - no_network - prevent network access for related files (default: True) + - ns_clean - clean up redundant namespace declarations + - recover - try hard to parse through broken XML + - remove_blank_text - discard blank text nodes + - remove_comments - discard comments + - remove_pis - discard processing instructions + - compact - safe memory for short text content (default: True) + - resolve_entities - replace entities by their text value (default: True) Other keyword arguments: - * encoding - override the document encoding - * target - a parser target object that will receive the parse events - * schema - an XMLSchema to validate against + - encoding - override the document encoding + - target - a parser target object that will receive the parse events + - schema - an XMLSchema to validate against Note that you should avoid sharing parsers between threads. While this is not harmful, it is more efficient to use separate parsers. This does not @@ -1498,8 +1528,10 @@ target, None, encoding) cdef class ETCompatXMLParser(XMLParser): - """An XML parser with an ElementTree compatible default setup. See the - XMLParser class for details. + """ETCompatXMLParser(self, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, remove_blank_text=False, compact=True, resolve_entities=True, remove_comments=True, remove_pis=True, target=None, encoding=None, schema=None) + An XML parser with an ElementTree compatible default setup. + + See the XMLParser class for details. This parser has ``remove_comments`` and ``remove_pis`` enabled by default and thus ignores comments and processing instructions. @@ -1532,15 +1564,17 @@ __GLOBAL_PARSER_CONTEXT.setDefaultParser(__DEFAULT_XML_PARSER) def setDefaultParser(parser=None): - "@deprecated: please use set_default_parser instead." + ":deprecated: please use set_default_parser instead." set_default_parser(parser) def getDefaultParser(): - "@deprecated: please use get_default_parser instead." + ":deprecated: please use get_default_parser instead." return get_default_parser() def set_default_parser(_BaseParser parser=None): - """Set a default parser for the current thread. This parser is used + """set_default_parser(parser=None) + + Set a default parser for the current thread. This parser is used globally whenever no parser is supplied to the various parse functions of the lxml API. If this function is called without a parser (or if it is None), the default parser is reset to the original configuration. @@ -1554,6 +1588,7 @@ __GLOBAL_PARSER_CONTEXT.setDefaultParser(parser) def get_default_parser(): + "get_default_parser()" return __GLOBAL_PARSER_CONTEXT.getDefaultParser() ############################################################ @@ -1568,22 +1603,26 @@ ) cdef class HTMLParser(_FeedParser): - """The HTML parser. This parser allows reading HTML into a normal XML - tree. By default, it can read broken (non well-formed) HTML, depending on - the capabilities of libxml2. Use the 'recover' option to switch this off. + """HTMLParser(self, recover=True, no_network=True, remove_blank_text=False, compact=True, remove_comments=False, remove_pis=False, target=None, encoding=None, schema=None) + The HTML parser. + + This parser allows reading HTML into a normal XML tree. By + default, it can read broken (non well-formed) HTML, depending on + the capabilities of libxml2. Use the 'recover' option to switch + this off. Available boolean keyword arguments: - * recover - try hard to parse through broken HTML (default: True) - * no_network - prevent network access for related files (default: True) - * remove_blank_text - discard empty text nodes - * remove_comments - discard comments - * remove_pis - discard processing instructions - * compact - safe memory for short text content (default: True) + - recover - try hard to parse through broken HTML (default: True) + - no_network - prevent network access for related files (default: True) + - remove_blank_text - discard empty text nodes + - remove_comments - discard comments + - remove_pis - discard processing instructions + - compact - safe memory for short text content (default: True) Other keyword arguments: - * encoding - override the document encoding - * target - a parser target object that will receive the parse events - * schema - an XMLSchema to validate against + - encoding - override the document encoding + - target - a parser target object that will receive the parse events + - schema - an XMLSchema to validate against Note that you should avoid sharing parsers between threads for performance reasons. Modified: lxml/trunk/src/lxml/relaxng.pxi ============================================================================== --- lxml/trunk/src/lxml/relaxng.pxi (original) +++ lxml/trunk/src/lxml/relaxng.pxi Wed Feb 13 21:48:39 2008 @@ -20,7 +20,8 @@ # RelaxNG cdef class RelaxNG(_Validator): - """Turn a document into a Relax NG validator. + """RelaxNG(self, etree=None, file=None) + Turn a document into a Relax NG validator. Either pass a schema as Element or ElementTree, or pass a file or filename through the ``file`` keyword argument. @@ -91,7 +92,9 @@ relaxng.xmlRelaxNGFree(self._c_schema) def __call__(self, etree): - """Validate doc using Relax NG. + """__call__(self, etree) + + Validate doc using Relax NG. Returns true if document is valid, false if not.""" cdef _Document doc Modified: lxml/trunk/src/lxml/schematron.pxi ============================================================================== --- lxml/trunk/src/lxml/schematron.pxi (original) +++ lxml/trunk/src/lxml/schematron.pxi Wed Feb 13 21:48:39 2008 @@ -66,14 +66,15 @@ # Schematron cdef class Schematron(_Validator): - """A Schematron validator. + """Schematron(self, etree=None, file=None) + A Schematron validator. Pass a root Element or an ElementTree to turn it into a validator. Alternatively, pass a filename as keyword argument 'file' to parse from the file system. """ cdef schematron.xmlSchematron* _c_schema - def __init__(self, etree=None, file=None): + def __init__(self, etree=None, *, file=None): cdef _Document doc cdef _Element root_node cdef xmlNode* c_node @@ -120,7 +121,9 @@ schematron.xmlSchematronFree(self._c_schema) def __call__(self, etree): - """Validate doc using Schematron. + """__call__(self, etree) + + Validate doc using Schematron. Returns true if document is valid, false if not.""" cdef _Document doc Modified: lxml/trunk/src/lxml/usedoctest.py ============================================================================== --- lxml/trunk/src/lxml/usedoctest.py (original) +++ lxml/trunk/src/lxml/usedoctest.py Wed Feb 13 21:48:39 2008 @@ -1,3 +1,13 @@ +"""Doctest module for XML comparison. + +Usage:: + + >>> import lxml.usedoctest + >>> # now do your XML doctests ... + +See `lxml.doctestcompare` +""" + from lxml import doctestcompare doctestcompare.temp_install(del_module=__name__) Modified: lxml/trunk/src/lxml/xinclude.pxi ============================================================================== --- lxml/trunk/src/lxml/xinclude.pxi (original) +++ lxml/trunk/src/lxml/xinclude.pxi Wed Feb 13 21:48:39 2008 @@ -8,7 +8,8 @@ pass cdef class XInclude: - """XInclude processor. + """XInclude(self) + XInclude processor. Create an instance and call it on an Element to run XInclude processing. @@ -22,6 +23,7 @@ return self._error_log.copy() def __call__(self, _Element node not None): + "__call__(self, node)" # We cannot pass the XML_PARSE_NOXINCNODE option as this would free # the XInclude nodes - there may still be Python references to them! # Therefore, we allow XInclude nodes to be converted to Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Wed Feb 13 21:48:39 2008 @@ -5,7 +5,9 @@ # module level API functions def clear_error_log(): - """Clear the global error log. Note that this log is already bound to a + """clear_error_log() + + Clear the global error log. Note that this log is already bound to a fixed size. """ __GLOBAL_ERROR_LOG.clear() @@ -14,7 +16,7 @@ """Clear the global error log. Note that this log is already bound to a fixed size. - @deprecated: use ``clear_error_log()`` instead. + :deprecated: use ``clear_error_log()`` instead. """ __GLOBAL_ERROR_LOG.clear() @@ -233,8 +235,10 @@ return _ListErrorLog(filtered, None, None) def filter_types(self, types): - """Filter the errors by the given types and return a new error log - containing the matches. + """filter_types(self, types) + + Filter the errors by the given types and return a new error + log containing the matches. """ cdef _LogEntry entry if not python.PySequence_Check(types): @@ -246,8 +250,10 @@ return _ListErrorLog(filtered, None, None) def filter_levels(self, levels): - """Filter the errors by the given error levels and return a new error - log containing the matches. + """filter_levels(self, levels) + + Filter the errors by the given error levels and return a new + error log containing the matches. """ cdef _LogEntry entry if not python.PySequence_Check(levels): @@ -259,7 +265,10 @@ return _ListErrorLog(filtered, None, None) def filter_from_level(self, level): - "Return a log with all messages of the requested level of worse." + """filter_from_level(self, level) + + Return a log with all messages of the requested level of worse. + """ cdef _LogEntry entry filtered = [] for entry in self._entries: @@ -268,15 +277,24 @@ return _ListErrorLog(filtered, None, None) def filter_from_fatals(self): - "Convenience method to get all fatal error messages." + """filter_from_fatals(self) + + Convenience method to get all fatal error messages. + """ return self.filter_from_level(ErrorLevels.FATAL) def filter_from_errors(self): - "Convenience method to get all error messages or worse." + """filter_from_errors(self) + + Convenience method to get all error messages or worse. + """ return self.filter_from_level(ErrorLevels.ERROR) def filter_from_warnings(self): - "Convenience method to get all warnings or worse." + """filter_from_warnings(self) + + Convenience method to get all warnings or worse. + """ return self.filter_from_level(ErrorLevels.WARNING) cdef class _ErrorLog(_ListErrorLog): @@ -331,7 +349,8 @@ python.PyList_Append(entries, entry) cdef class PyErrorLog(_BaseErrorLog): - """A global error log that connects to the Python stdlib logging package. + """PyErrorLog(self, logger_name=None) + A global error log that connects to the Python stdlib logging package. The constructor accepts an optional logger name. @@ -395,12 +414,14 @@ Note that this disables access to the global error log from exceptions. Parsers, XSLT etc. will continue to provide their normal local error log. - @deprecated: use ``use_global_python_log()`` instead. + :deprecated: use ``use_global_python_log()`` instead. """ use_global_python_log(log) def use_global_python_log(PyErrorLog log not None): - """Replace the global error log by an etree.PyErrorLog that uses the + """use_global_python_log(log) + + Replace the global error log by an etree.PyErrorLog that uses the standard Python logging package. Note that this disables access to the global error log from exceptions. Modified: lxml/trunk/src/lxml/xmlid.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlid.pxi (original) +++ lxml/trunk/src/lxml/xmlid.pxi Wed Feb 13 21:48:39 2008 @@ -1,7 +1,9 @@ cdef object _find_id_attributes def XMLID(text): - """Parse the text and return a tuple (root node, ID dictionary). The root + """XMLID(text) + + Parse the text and return a tuple (root node, ID dictionary). The root node is the same as returned by the XML() function. The dictionary contains string-element pairs. The dictionary keys are the values of 'id' attributes. The elements referenced by the ID are stored as dictionary @@ -19,7 +21,9 @@ return (root, dic) def XMLDTDID(text): - """Parse the text and return a tuple (root node, ID dictionary). The root + """XMLDTDID(text) + + Parse the text and return a tuple (root node, ID dictionary). The root node is the same as returned by the XML() function. The dictionary contains string-element pairs. The dictionary keys are the values of ID attributes as defined by the DTD. The elements referenced by the ID are @@ -37,7 +41,9 @@ return (root, _IDDict(root)) def parseid(source, parser=None): - """Parses the source into a tuple containing an ElementTree object and an + """parseid(source, parser=None) + + Parses the source into a tuple containing an ElementTree object and an ID dictionary. If no parser is provided as second argument, the default parser is used. @@ -49,7 +55,8 @@ return (_elementTreeFactory(doc, None), _IDDict(doc)) cdef class _IDDict: - """A dictionary-like proxy class that mapps ID attributes to elements. + """IDDict(self, etree) + A dictionary-like proxy class that mapps ID attributes to elements. The dictionary must be instantiated with the root element of a parsed XML document, otherwise the behaviour is undefined. Elements and XML trees Modified: lxml/trunk/src/lxml/xmlschema.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlschema.pxi (original) +++ lxml/trunk/src/lxml/xmlschema.pxi Wed Feb 13 21:48:39 2008 @@ -20,7 +20,8 @@ # XMLSchema cdef class XMLSchema(_Validator): - """Turn a document into an XML Schema validator. + """XMLSchema(self, etree=None, file=None) + Turn a document into an XML Schema validator. Either pass a schema as Element or ElementTree, or pass a file or filename through the ``file`` keyword argument. @@ -83,7 +84,9 @@ xmlschema.xmlSchemaFree(self._c_schema) def __call__(self, etree): - """Validate doc using XML Schema. + """__call__(self, etree) + + Validate doc using XML Schema. Returns true if document is valid, false if not. """ Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Wed Feb 13 21:48:39 2008 @@ -127,13 +127,17 @@ self._context.set_context(xpathCtxt) def evaluate(self, _eval_arg, **_variables): - """Evaluate an XPath expression. + """evaluate(self, _eval_arg, **_variables) + + Evaluate an XPath expression. Instead of calling this method, you can also call the evaluator object itself. Variables may be provided as keyword arguments. Note that namespaces are currently not supported for variables. + + :deprecated: call the object, not its method. """ return self(_eval_arg, **_variables) @@ -207,7 +211,8 @@ cdef class XPathElementEvaluator(_XPathEvaluatorBase): - """Create an XPath evaluator for an element. + """XPathElementEvaluator(self, element, namespaces=None, extensions=None, regexp=True) + Create an XPath evaluator for an element. Absolute XPath expressions (starting with '/') will be evaluated against the ElementTree as returned by getroottree(). @@ -232,17 +237,34 @@ def registerNamespace(self, prefix, uri): """Register a namespace with the XPath context. + + :deprecated: use ``register_namespace()`` instead + """ + self._context.addNamespace(prefix, uri) + + def register_namespace(self, prefix, uri): + """Register a namespace with the XPath context. """ self._context.addNamespace(prefix, uri) def registerNamespaces(self, namespaces): """Register a prefix -> uri dict. + + :deprecated: use ``register_namespaces()`` instead + """ + for prefix, uri in namespaces.items(): + self._context.addNamespace(prefix, uri) + + def register_namespaces(self, namespaces): + """Register a prefix -> uri dict. """ for prefix, uri in namespaces.items(): self._context.addNamespace(prefix, uri) def __call__(self, _path, **_variables): - """Evaluate an XPath expression on the document. + """__call__(self, _path, **_variables) + + Evaluate an XPath expression on the document. Variables may be provided as keyword arguments. Note that namespaces are currently not supported for variables. @@ -276,7 +298,8 @@ cdef class XPathDocumentEvaluator(XPathElementEvaluator): - """Create an XPath evaluator for an ElementTree. + """XPathDocumentEvaluator(self, etree, namespaces=None, extensions=None, regexp=True) + Create an XPath evaluator for an ElementTree. Additional namespace declarations can be passed with the 'namespace' keyword argument. EXSLT regular expression support can be disabled with @@ -289,7 +312,9 @@ extensions=extensions, regexp=regexp) def __call__(self, _path, **_variables): - """Evaluate an XPath expression on the document. + """__call__(self, _path, **_variables) + + Evaluate an XPath expression on the document. Variables may be provided as keyword arguments. Note that namespaces are currently not supported for variables. @@ -327,7 +352,9 @@ def XPathEvaluator(etree_or_element, *, namespaces=None, extensions=None, regexp=True): - """Creates an XPath evaluator for an ElementTree or an Element. + """XPathEvaluator(etree_or_element, namespaces=None, extensions=None, regexp=True) + + Creates an XPath evaluator for an ElementTree or an Element. The resulting object can be called with an XPath expression as argument and XPath variables provided as keyword arguments. @@ -347,8 +374,8 @@ cdef class XPath(_XPathEvaluatorBase): - """A compiled XPath expression that can be called on Elements and - ElementTrees. + """XPath(self, path, namespaces=None, extensions=None, regexp=True) + A compiled XPath expression that can be called on Elements and ElementTrees. Besides the XPath expression, you can pass prefix-namespace mappings and extension functions to the constructor through the keyword arguments @@ -374,6 +401,7 @@ self._raise_parse_error() def __call__(self, _etree_or_element, **_variables): + "__call__(self, _etree_or_element, **_variables)" cdef xpath.xmlXPathObject* xpathObj cdef _Document document cdef _Element element @@ -414,8 +442,8 @@ _find_namespaces = re.compile('({[^}]+})').findall cdef class ETXPath(XPath): - """Special XPath class that supports the ElementTree {uri} notation for - namespaces. + """ETXPath(self, path, extensions=None, regexp=True) + Special XPath class that supports the ElementTree {uri} notation for namespaces. Note that this class does not accept the ``namespace`` keyword argument. All namespaces must be passed as part of the path string. Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Wed Feb 13 21:48:39 2008 @@ -167,16 +167,18 @@ # XSLT file/network access control cdef class XSLTAccessControl: - """Access control for XSLT: reading/writing files, directories and network - I/O. Access to a type of resource is granted or denied by passing any of - the following keyword arguments. All of them default to True to allow - access. - - * read_file - * write_file - * create_dir - * read_network - * write_network + """XSLTAccessControl(self, read_file=True, write_file=True, create_dir=True, read_network=True, write_network=True) + + Access control for XSLT: reading/writing files, directories and + network I/O. Access to a type of resource is granted or denied by + passing any of the following boolean keyword arguments. All of + them default to True to allow access. + + - read_file + - write_file + - create_dir + - read_network + - write_network """ cdef xslt.xsltSecurityPrefs* _prefs def __init__(self, *, read_file=True, write_file=True, create_dir=True, @@ -252,16 +254,25 @@ cdef class XSLT: - """Turn a document into an XSLT object. + """XSLT(self, xslt_input, extensions=None, regexp=True, access_control=None) + + Turn an XSL document into an XSLT object. + + Calling this object on a tree or Element will execute the XSLT:: + + >>> transform = etree.XSLT(xsl_tree) + >>> result = transform(xml_tree) Keyword arguments of the constructor: - * regexp - enable exslt regular expression support in XPath (default: True) - * access_control - access restrictions for network or file system + - regexp: enable exslt regular expression support in XPath + (default: True) + - access_control: access restrictions for network or file + system (see `XSLTAccessControl`) - Keyword arguments of the XSLT run: - * profile_run - enable XSLT profiling + Keyword arguments of the XSLT call: + - profile_run: enable XSLT profiling (default: False) - Other keyword arguments are passed to the stylesheet. + Other keyword arguments of the call are passed to the stylesheet. """ cdef _XSLTContext _context cdef xslt.xsltStylesheet* _c_style @@ -328,14 +339,22 @@ xslt.xsltFreeStylesheet(self._c_style) property error_log: + "The log of errors and warnings of an XSLT execution." def __get__(self): return self._error_log.copy() def apply(self, _input, *, profile_run=False, **_kw): + """apply(self, _input, profile_run=False, **_kw) + + :deprecated: call the object, not this method.""" return self(_input, profile_run=profile_run, **_kw) def tostring(self, _ElementTree result_tree): - """Save result doc to string based on stylesheet output method. + """tostring(self, result_tree) + + Save result doc to string based on stylesheet output method. + + :deprecated: use str(result_tree) instead. """ return str(result_tree) @@ -346,6 +365,14 @@ return _copyXSLT(self) def __call__(self, _input, *, profile_run=False, **_kw): + """__call__(self, _input, profile_run=False, **_kw) + + Execute the XSL transformation on a tree or Element. + + Pass the ``profile_run`` option to get profile information + about the XSLT. The result of the XSLT will have a property + xslt_profile that holds an XML tree with profiling data. + """ cdef _XSLTContext context cdef _XSLTResolverContext resolver_context cdef _Document input_doc From scoder at codespeak.net Wed Feb 13 21:48:44 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 13 Feb 2008 21:48:44 +0100 (CET) Subject: [Lxml-checkins] r51453 - in lxml/trunk: . src/lxml Message-ID: <20080213204844.7A2EB168406@codespeak.net> Author: scoder Date: Wed Feb 13 21:48:43 2008 New Revision: 51453 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.pyclasslookup.pyx Log: r3464 at delle: sbehnel | 2008-02-13 16:30:01 +0100 one more signature Modified: lxml/trunk/src/lxml/lxml.pyclasslookup.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.pyclasslookup.pyx (original) +++ lxml/trunk/src/lxml/lxml.pyclasslookup.pyx Wed Feb 13 21:48:43 2008 @@ -238,7 +238,8 @@ cdef class PythonElementClassLookup(FallbackElementClassLookup): - """Element class lookup based on a subclass method. + """PythonElementClassLookup(self, fallback=None) + Element class lookup based on a subclass method. To use it, inherit from this class and override the lookup method to lookup the element class for a node:: From scoder at codespeak.net Wed Feb 13 21:48:48 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 13 Feb 2008 21:48:48 +0100 (CET) Subject: [Lxml-checkins] r51454 - in lxml/trunk: . src/lxml src/lxml/html Message-ID: <20080213204848.8D998168407@codespeak.net> Author: scoder Date: Wed Feb 13 21:48:48 2008 New Revision: 51454 Removed: lxml/trunk/src/lxml/htmlbuilder.py Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/html/builder.py Log: r3465 at delle: sbehnel | 2008-02-13 17:21:12 +0100 removed redundant module lxml.htmlbuilder (duplicate of lxml.html.builder) Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Feb 13 21:48:48 2008 @@ -14,6 +14,9 @@ Bugs fixed ---------- +* The module ``lxml.html.builder`` was duplicated as + ``lxml.htmlbuilder`` + * Setting an element slice in objectify could insert slice-overlapping elements at the wrong position. Modified: lxml/trunk/src/lxml/html/builder.py ============================================================================== --- lxml/trunk/src/lxml/html/builder.py (original) +++ lxml/trunk/src/lxml/html/builder.py Wed Feb 13 21:48:48 2008 @@ -1,3 +1,31 @@ +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2004 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + """ HTML specialisation of ``builder.py`` by Fredrik Lundh Deleted: /lxml/trunk/src/lxml/htmlbuilder.py ============================================================================== --- /lxml/trunk/src/lxml/htmlbuilder.py Wed Feb 13 21:48:48 2008 +++ (empty file) @@ -1,154 +0,0 @@ -# -# HTML specialisation of ``builder.py`` by Fredrik Lundh -# -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2004 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - -""" -Usage:: - - >>> from lxml.htmlbuilder import * - >>> html = HTML( - ... HEAD( TITLE("Hello World") ), - ... BODY( CLASS("main"), - ... H1("Hello World !") - ... ) - ... ) - - >>> import lxml.etree - >>> print lxml.etree.tostring(html, pretty_print=True) - - - Hello World - - -

Hello World !

- - - -""" - -from builder import E - -# elements -A = E.a # anchor -ABBR = E.abbr # abbreviated form (e.g., WWW, HTTP, etc.) -ACRONYM = E.acronym # -ADDRESS = E.address # information on author -APPLET = E.applet # Java applet (DEPRECATED) -AREA = E.area # client-side image map area -B = E.b # bold text style -BASE = E.base # document base URI -BASEFONT = E.basefont # base font size (DEPRECATED) -BDO = E.bdo # I18N BiDi over-ride -BIG = E.big # large text style -BLOCKQUOTE = E.blockquote # long quotation -BODY = E.body # document body -BR = E.br # forced line break -BUTTON = E.button # push button -CAPTION = E.caption # table caption -CENTER = E.center # shorthand for DIV align=center (DEPRECATED) -CITE = E.cite # citation -CODE = E.code # computer code fragment -COL = E.col # table column -COLGROUP = E.colgroup # table column group -DD = E.dd # definition description -DEL = getattr(E, 'del') # deleted text -DFN = E.dfn # instance definition -DIR = E.dir # directory list (DEPRECATED) -DIV = E.div # generic language/style container -DL = E.dl # definition list -DT = E.dt # definition term -EM = E.em # emphasis -FIELDSET = E.fieldset # form control group -FONT = E.font # local change to font (DEPRECATED) -FORM = E.form # interactive form -FRAME = E.frame # subwindow -FRAMESET = E.frameset # window subdivision -H1 = E.h1 # heading -H2 = E.h2 # heading -H3 = E.h3 # heading -H4 = E.h4 # heading -H5 = E.h5 # heading -H6 = E.h6 # heading -HEAD = E.head # document head -HR = E.hr # horizontal rule -HTML = E.html # document root element -I = E.i # italic text style -IFRAME = E.iframe # inline subwindow -IMG = E.img # Embedded image -INPUT = E.input # form control -INS = E.ins # inserted text -ISINDEX = E.isindex # single line prompt (DEPRECATED) -KBD = E.kbd # text to be entered by the user -LABEL = E.label # form field label text -LEGEND = E.legend # fieldset legend -LI = E.li # list item -LINK = E.link # a media-independent link -MAP = E.map # client-side image map -MENU = E.menu # menu list (DEPRECATED) -META = E.meta # generic metainformation -NOFRAMES = E.noframes # alternate content container for non frame-based rendering -NOSCRIPT = E.noscript # alternate content container for non script-based rendering -OBJECT = E.object # generic embedded object -OL = E.ol # ordered list -OPTGROUP = E.optgroup # option group -OPTION = E.option # selectable choice -P = E.p # paragraph -PARAM = E.param # named property value -PRE = E.pre # preformatted text -Q = E.q # short inline quotation -S = E.s # strike-through text style (DEPRECATED) -SAMP = E.samp # sample program output, scripts, etc. -SCRIPT = E.script # script statements -SELECT = E.select # option selector -SMALL = E.small # small text style -SPAN = E.span # generic language/style container -STRIKE = E.strike # strike-through text (DEPRECATED) -STRONG = E.strong # strong emphasis -STYLE = E.style # style info -SUB = E.sub # subscript -SUP = E.sup # superscript -TABLE = E.table # -TBODY = E.tbody # table body -TD = E.td # table data cell -TEXTAREA = E.textarea # multi-line text field -TFOOT = E.tfoot # table footer -TH = E.th # table header cell -THEAD = E.thead # table header -TITLE = E.title # document title -TR = E.tr # table row -TT = E.tt # teletype or monospaced text style -U = E.u # underlined text style (DEPRECATED) -UL = E.ul # unordered list -VAR = E.var # instance of a variable or program argument - -# attributes (only reserved words are included here) -ATTR = dict -def CLASS(v): return {'class': v} -def FOR(v): return {'for': v} From scoder at codespeak.net Wed Feb 13 21:48:53 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 13 Feb 2008 21:48:53 +0100 (CET) Subject: [Lxml-checkins] r51455 - in lxml/trunk: . src/lxml Message-ID: <20080213204853.1092816840A@codespeak.net> Author: scoder Date: Wed Feb 13 21:48:52 2008 New Revision: 51455 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/xslt.pxi Log: r3466 at delle: sbehnel | 2008-02-13 17:22:23 +0100 rst doc fixes Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Wed Feb 13 21:48:52 2008 @@ -1473,22 +1473,24 @@ attribute default values are requested. Available boolean keyword arguments: - - attribute_defaults - read default attributes from DTD - - dtd_validation - validate (if DTD is available) - - load_dtd - use DTD for parsing - - no_network - prevent network access for related files (default: True) - - ns_clean - clean up redundant namespace declarations - - recover - try hard to parse through broken XML - - remove_blank_text - discard blank text nodes - - remove_comments - discard comments - - remove_pis - discard processing instructions - - compact - safe memory for short text content (default: True) - - resolve_entities - replace entities by their text value (default: True) + + - attribute_defaults - read default attributes from DTD + - dtd_validation - validate (if DTD is available) + - load_dtd - use DTD for parsing + - no_network - prevent network access for related files (default: True) + - ns_clean - clean up redundant namespace declarations + - recover - try hard to parse through broken XML + - remove_blank_text - discard blank text nodes + - remove_comments - discard comments + - remove_pis - discard processing instructions + - compact - safe memory for short text content (default: True) + - resolve_entities - replace entities by their text value (default: True) Other keyword arguments: - - encoding - override the document encoding - - target - a parser target object that will receive the parse events - - schema - an XMLSchema to validate against + + - encoding - override the document encoding + - target - a parser target object that will receive the parse events + - schema - an XMLSchema to validate against Note that you should avoid sharing parsers between threads. While this is not harmful, it is more efficient to use separate parsers. This does not @@ -1612,17 +1614,19 @@ this off. Available boolean keyword arguments: - - recover - try hard to parse through broken HTML (default: True) - - no_network - prevent network access for related files (default: True) - - remove_blank_text - discard empty text nodes - - remove_comments - discard comments - - remove_pis - discard processing instructions - - compact - safe memory for short text content (default: True) + + - recover - try hard to parse through broken HTML (default: True) + - no_network - prevent network access for related files (default: True) + - remove_blank_text - discard empty text nodes + - remove_comments - discard comments + - remove_pis - discard processing instructions + - compact - safe memory for short text content (default: True) Other keyword arguments: - - encoding - override the document encoding - - target - a parser target object that will receive the parse events - - schema - an XMLSchema to validate against + + - encoding - override the document encoding + - target - a parser target object that will receive the parse events + - schema - an XMLSchema to validate against Note that you should avoid sharing parsers between threads for performance reasons. Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Wed Feb 13 21:48:52 2008 @@ -174,11 +174,13 @@ passing any of the following boolean keyword arguments. All of them default to True to allow access. - - read_file - - write_file - - create_dir - - read_network - - write_network + - read_file + - write_file + - create_dir + - read_network + - write_network + + See `XSLT`. """ cdef xslt.xsltSecurityPrefs* _prefs def __init__(self, *, read_file=True, write_file=True, create_dir=True, @@ -264,15 +266,18 @@ >>> result = transform(xml_tree) Keyword arguments of the constructor: - - regexp: enable exslt regular expression support in XPath - (default: True) - - access_control: access restrictions for network or file - system (see `XSLTAccessControl`) + + - regexp: enable exslt regular expression support in XPath + (default: True) + - access_control: access restrictions for network or file + system (see `XSLTAccessControl`) Keyword arguments of the XSLT call: - - profile_run: enable XSLT profiling (default: False) - Other keyword arguments of the call are passed to the stylesheet. + - profile_run: enable XSLT profiling (default: False) + + Other keyword arguments of the call are passed to the stylesheet + as parameters. """ cdef _XSLTContext _context cdef xslt.xsltStylesheet* _c_style From scoder at codespeak.net Wed Feb 13 21:48:57 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 13 Feb 2008 21:48:57 +0100 (CET) Subject: [Lxml-checkins] r51456 - in lxml/trunk: . src/lxml/html Message-ID: <20080213204857.25C27168406@codespeak.net> Author: scoder Date: Wed Feb 13 21:48:56 2008 New Revision: 51456 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/html/builder.py Log: r3467 at delle: sbehnel | 2008-02-13 17:24:33 +0100 cleanup Modified: lxml/trunk/src/lxml/html/builder.py ============================================================================== --- lxml/trunk/src/lxml/html/builder.py (original) +++ lxml/trunk/src/lxml/html/builder.py Wed Feb 13 21:48:56 2008 @@ -1,29 +1,6 @@ # -------------------------------------------------------------------- # The ElementTree toolkit is -# # Copyright (c) 1999-2004 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. # -------------------------------------------------------------------- """ From scoder at codespeak.net Wed Feb 13 21:49:02 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 13 Feb 2008 21:49:02 +0100 (CET) Subject: [Lxml-checkins] r51457 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20080213204902.07C7E16840B@codespeak.net> Author: scoder Date: Wed Feb 13 21:49:01 2008 New Revision: 51457 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/lxml.pyclasslookup.pyx lxml/trunk/src/lxml/tests/test_pyclasslookup.py Log: r3468 at delle: sbehnel | 2008-02-13 19:52:53 +0100 child iteration in lxml.pyclasslookup Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Feb 13 21:49:01 2008 @@ -8,6 +8,8 @@ Features added -------------- +* Child iteration in ``lxml.pyclasslookup``. + * Docstrings now reflect the signature of functions and methods to make them visible in API docs and ``help()`` Modified: lxml/trunk/src/lxml/lxml.pyclasslookup.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.pyclasslookup.pyx (original) +++ lxml/trunk/src/lxml/lxml.pyclasslookup.pyx Wed Feb 13 21:49:01 2008 @@ -1,3 +1,29 @@ +""" +A whole-tree Element class lookup scheme for `lxml.etree`. + +This class lookup scheme allows access to the entire XML tree. To use +it, let a class inherit from `PythonElementClassLookup` and +re-implement the ``lookup(self, doc, root)`` method: + + >>> from lxml import etree, pyclasslookup + >>> + >>> class MyElementClass(etree.ElementBase): + ... honkey = True + ... + >>> class MyLookup(pyclasslookup.PythonElementClassLookup): + ... def lookup(self, doc, root): + ... if root.tag == "sometag": + ... return MyElementClass + ... else: + ... for child in root: + ... if child.tag == "someothertag": + ... return MyElementClass + ... # delegate to default + ... return None + +See http://codespeak.net/lxml/element_classes.html +""" + from etreepublic cimport _Document, _Element, ElementBase from etreepublic cimport ElementClassLookup, FallbackElementClassLookup from etreepublic cimport elementFactory, import_lxml__etree @@ -128,6 +154,9 @@ c_node = cetree.findChildBackwards(self._c_node, 0) return c_node != NULL + def __iter__(self): + return iter(self.getchildren()) + def get(self, key, default=None): """Gets an element attribute. """ @@ -258,6 +287,10 @@ self._lookup_function = _lookup_class def lookup(self, doc, element): + """lookup(self, doc, element) + + Override this method to implement your own lookup scheme. + """ return None cdef object _lookup_class(state, _Document doc, tree.xmlNode* c_node): Modified: lxml/trunk/src/lxml/tests/test_pyclasslookup.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_pyclasslookup.py (original) +++ lxml/trunk/src/lxml/tests/test_pyclasslookup.py Wed Feb 13 21:49:01 2008 @@ -245,6 +245,20 @@ self.assertEquals([ c.tag for c in root.getchildren() ], child_tags) + def test_lookup_iter_children(self): + el_class = self._buildElementClass() + el_class.CHILD_TAGS = None + def lookup(doc, el): + if el_class.CHILD_TAGS is None: + el_class.CHILD_TAGS = [ c.tag for c in el ] + return el_class + self._setClassLookup(lookup) + root = self.XML(xml_str) + child_tags = root.CHILD_TAGS + self.assertNotEquals(None, child_tags) + self.assertEquals([ c.tag for c in root.getchildren() ], + child_tags) + def test_lookup_getparent(self): el_class = self._buildElementClass() el_class.PARENT = None From scoder at codespeak.net Wed Feb 13 21:49:05 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 13 Feb 2008 21:49:05 +0100 (CET) Subject: [Lxml-checkins] r51458 - lxml/trunk Message-ID: <20080213204905.6C68D16840B@codespeak.net> Author: scoder Date: Wed Feb 13 21:49:05 2008 New Revision: 51458 Modified: lxml/trunk/ (props changed) lxml/trunk/Makefile Log: r3469 at delle: sbehnel | 2008-02-13 19:54:11 +0100 removed private stuff from generated API documentation Modified: lxml/trunk/Makefile ============================================================================== --- lxml/trunk/Makefile (original) +++ lxml/trunk/Makefile Wed Feb 13 21:49:05 2008 @@ -42,10 +42,11 @@ rm -fr doc/html/api @[ -x "`which epydoc`" ] \ && (cd src && echo "Generating API docs ..." && \ - PYTHONPATH=. epydoc -v --docformat "restructuredtext en" -o ../doc/html/api --name lxml --url http://codespeak.net/lxml/ lxml/) \ + PYTHONPATH=. epydoc -v --docformat "restructuredtext en" \ + -o ../doc/html/api --no-private --exclude='[.]html[.]tests|[.]_' \ + --name lxml --url http://codespeak.net/lxml/ lxml/) \ || (echo "not generating epydoc API documentation") -# XXX What should the default be? test: test_inplace valtest: valgrind_test_inplace From scoder at codespeak.net Wed Feb 13 21:49:13 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 13 Feb 2008 21:49:13 +0100 (CET) Subject: [Lxml-checkins] r51459 - in lxml/trunk: . src/lxml src/lxml/html src/lxml/tests Message-ID: <20080213204913.46859168407@codespeak.net> Author: scoder Date: Wed Feb 13 21:49:12 2008 New Revision: 51459 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/ElementInclude.py lxml/trunk/src/lxml/builder.py lxml/trunk/src/lxml/cssselect.py lxml/trunk/src/lxml/doctestcompare.py lxml/trunk/src/lxml/html/__init__.py lxml/trunk/src/lxml/html/builder.py lxml/trunk/src/lxml/html/clean.py lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/lxml.objectify.pyx lxml/trunk/src/lxml/sax.py lxml/trunk/src/lxml/tests/__init__.py Log: r3470 at delle: sbehnel | 2008-02-13 20:12:19 +0100 loads of docstrings Modified: lxml/trunk/src/lxml/ElementInclude.py ============================================================================== --- lxml/trunk/src/lxml/ElementInclude.py (original) +++ lxml/trunk/src/lxml/ElementInclude.py Wed Feb 13 21:49:12 2008 @@ -41,9 +41,14 @@ # OF THIS SOFTWARE. # -------------------------------------------------------------------- -## -# Limited XInclude support for the ElementTree package. -## +""" +Limited XInclude support for the ElementTree package. + +While lxml.etree has full support for XInclude (see +`etree.ElementTree.xinclude()`), this module provides a simpler, pure +Python, ElementTree compatible implementation that supports a simple +form of custom URL resolvers. +""" import copy, etree from urlparse import urljoin Modified: lxml/trunk/src/lxml/builder.py ============================================================================== --- lxml/trunk/src/lxml/builder.py (original) +++ lxml/trunk/src/lxml/builder.py Wed Feb 13 21:49:12 2008 @@ -33,6 +33,10 @@ # OF THIS SOFTWARE. # -------------------------------------------------------------------- +""" +The ``E`` Element factory for generating XML documents. +""" + import etree as ET try: Modified: lxml/trunk/src/lxml/cssselect.py ============================================================================== --- lxml/trunk/src/lxml/cssselect.py (original) +++ lxml/trunk/src/lxml/cssselect.py Wed Feb 13 21:49:12 2008 @@ -1,3 +1,9 @@ +"""CSS Selectors based on XPath. + +This module supports selecting XML/HTML tags based on CSS selectors. +See the `CSSSelector` class for details. +""" + import re from lxml import etree @@ -11,7 +17,17 @@ pass class CSSSelector(etree.XPath): + """A CSS selector. + + Usage:: + >>> from lxml import etree, cssselect + >>> select = cssselect.CSSSelector("a tag > child") + + >>> root = etree.XML("TEXT") + >>> [ el.tag for el in select(root) ] + ['child'] + """ def __init__(self, css): path = css_to_xpath(css) etree.XPath.__init__(self, path) @@ -575,9 +591,8 @@ self.condition = other.condition class XPathExprOr(XPathExpr): - """ - Represents on |'d expressions. Note that unfortunately it isn't + Represents |'d expressions. Note that unfortunately it isn't the union, it's the sum, so duplicate elements will appear. """ Modified: lxml/trunk/src/lxml/doctestcompare.py ============================================================================== --- lxml/trunk/src/lxml/doctestcompare.py (original) +++ lxml/trunk/src/lxml/doctestcompare.py Wed Feb 13 21:49:12 2008 @@ -1,8 +1,16 @@ """ lxml-based doctest output comparison. -To use this you must call ``lxmldoctest.install()``, which will cause -doctest to use this in all subsequent calls. +Note: normally, you should just import the `lxml.usedoctest` and +`lxml.html.usedoctest` modules from within a doctest, instead of this +one:: + + >>> import lxml.usedoctest # for XML output + + >>> import lxml.html.usedoctest # for HTML output + +To use this module directly, you must call ``lxmldoctest.install()``, +which will cause doctest to use this in all subsequent calls. This changes the way output is checked and comparisons are made for XML or HTML-like content. Modified: lxml/trunk/src/lxml/html/__init__.py ============================================================================== --- lxml/trunk/src/lxml/html/__init__.py (original) +++ lxml/trunk/src/lxml/html/__init__.py Wed Feb 13 21:49:12 2008 @@ -1,3 +1,6 @@ +"""The ``lxml.html`` tool set for HTML handling. +""" + import threading import re import urlparse Modified: lxml/trunk/src/lxml/html/builder.py ============================================================================== --- lxml/trunk/src/lxml/html/builder.py (original) +++ lxml/trunk/src/lxml/html/builder.py Wed Feb 13 21:49:12 2008 @@ -4,7 +4,7 @@ # -------------------------------------------------------------------- """ -HTML specialisation of ``builder.py`` by Fredrik Lundh +A set of HTML generator tags for building HTML documents. Usage:: Modified: lxml/trunk/src/lxml/html/clean.py ============================================================================== --- lxml/trunk/src/lxml/html/clean.py (original) +++ lxml/trunk/src/lxml/html/clean.py Wed Feb 13 21:49:12 2008 @@ -1,3 +1,9 @@ +"""A cleanup tool for HTML. + +Removes unwanted tags and content. See the `Cleaner` class for +details. +""" + import re import copy import urlparse Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Wed Feb 13 21:49:12 2008 @@ -1,5 +1,5 @@ -"""The lxml.etree module implements the extended ElementTree API for -XML. +"""The ``lxml.etree`` module implements the extended ElementTree API +for XML. """ __docformat__ = "restructuredtext en" Modified: lxml/trunk/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.objectify.pyx (original) +++ lxml/trunk/src/lxml/lxml.objectify.pyx Wed Feb 13 21:49:12 2008 @@ -1,3 +1,7 @@ +"""The ``lxml.objectify`` module implements a Python object API for +XML. It is based on `lxml.etree`. +""" + from etreepublic cimport _Document, _Element, ElementBase from etreepublic cimport _ElementIterator, ElementClassLookup from etreepublic cimport elementFactory, import_lxml__etree, textOf Modified: lxml/trunk/src/lxml/sax.py ============================================================================== --- lxml/trunk/src/lxml/sax.py (original) +++ lxml/trunk/src/lxml/sax.py Wed Feb 13 21:49:12 2008 @@ -1,3 +1,15 @@ +""" +SAX-based adapter to copy trees from/to the Python standard library. + +Use the `ElementTreeContentHandler` class to build an ElementTree from +SAX events. + +Use the `ElementTreeProducer` class or the `saxify()` function to fire +the SAX events of an ElementTree against a SAX ContentHandler. + +See http://codespeak.net/lxml/sax.html +""" + from xml.sax.handler import ContentHandler import etree from etree import ElementTree, SubElement @@ -220,4 +232,7 @@ return prefix + ':' + local_name def saxify(element_or_tree, content_handler): + """One-shot helper to generate SAX events from an XML tree and fire + them against a SAX ContentHandler. + """ return ElementTreeProducer(element_or_tree, content_handler).saxify() Modified: lxml/trunk/src/lxml/tests/__init__.py ============================================================================== --- lxml/trunk/src/lxml/tests/__init__.py (original) +++ lxml/trunk/src/lxml/tests/__init__.py Wed Feb 13 21:49:12 2008 @@ -1,2 +1,4 @@ -# this is a package +""" +The lxml test suite for lxml, ElementTree and cElementTree. +""" From scoder at codespeak.net Wed Feb 13 21:49:16 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 13 Feb 2008 21:49:16 +0100 (CET) Subject: [Lxml-checkins] r51460 - lxml/trunk Message-ID: <20080213204916.11E9416840D@codespeak.net> Author: scoder Date: Wed Feb 13 21:49:16 2008 New Revision: 51460 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r3471 at delle: sbehnel | 2008-02-13 20:29:49 +0100 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Feb 13 21:49:16 2008 @@ -10,8 +10,8 @@ * Child iteration in ``lxml.pyclasslookup``. -* Docstrings now reflect the signature of functions and methods to - make them visible in API docs and ``help()`` +* Loads of new docstrings reflect the signature of functions and + methods to make them visible in API docs and ``help()`` Bugs fixed ---------- From scoder at codespeak.net Wed Feb 13 21:49:22 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 13 Feb 2008 21:49:22 +0100 (CET) Subject: [Lxml-checkins] r51461 - in lxml/trunk: . src/lxml/html Message-ID: <20080213204922.2B339168406@codespeak.net> Author: scoder Date: Wed Feb 13 21:49:21 2008 New Revision: 51461 Added: lxml/trunk/src/lxml/html/_setmixin.py - copied unchanged from r50752, lxml/trunk/src/lxml/html/setmixin.py Removed: lxml/trunk/src/lxml/html/setmixin.py Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/html/__init__.py Log: r3472 at delle: sbehnel | 2008-02-13 20:31:51 +0100 renamed lxml.html.setmixin to _setmixin to make clear it's not a real part of lxml Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Feb 13 21:49:21 2008 @@ -25,6 +25,11 @@ Other changes ------------- +* The previously public module ``lxml.html.setmixin`` was renamed to + ``lxml.html._setmixin`` as it is not an official part of lxml. If + you want to use it, feel free to copy it over to your own source + base. + * Passing ``--with-xslt-config=/path/to/xslt-config`` to setup.py will override the ``xslt-config`` script that is used to determine the C compiler options. Modified: lxml/trunk/src/lxml/html/__init__.py ============================================================================== --- lxml/trunk/src/lxml/html/__init__.py (original) +++ lxml/trunk/src/lxml/html/__init__.py Wed Feb 13 21:49:21 2008 @@ -8,7 +8,7 @@ from lxml import etree from lxml.html import defs from lxml import cssselect -from lxml.html.setmixin import SetMixin +from lxml.html._setmixin import SetMixin try: from UserDict import DictMixin except ImportError: Deleted: /lxml/trunk/src/lxml/html/setmixin.py ============================================================================== --- /lxml/trunk/src/lxml/html/setmixin.py Wed Feb 13 21:49:21 2008 +++ (empty file) @@ -1,115 +0,0 @@ -class SetMixin(object): - - """ - Mix-in for sets. You must define __iter__, add, remove - """ - - def __len__(self): - length = 0 - for item in self: - length += 1 - return length - - def __contains__(self, item): - for has_item in self: - if item == has_item: - return True - return False - - def issubset(self, other): - for item in other: - if item not in self: - return False - return True - - __le__ = issubset - - def issuperset(self, other): - for item in self: - if item not in other: - return False - return True - - __ge__ = issuperset - - def union(self, other): - return self | other - - def __or__(self, other): - new = self.copy() - new |= other - return new - - def intersection(self, other): - return self & other - - def __and__(self, other): - new = self.copy() - new &= other - return new - - def difference(self, other): - return self - other - - def __sub__(self, other): - new = self.copy() - new -= other - return new - - def symmetric_difference(self, other): - return self ^ other - - def __xor__(self, other): - new = self.copy() - new ^= other - return new - - def copy(self): - return set(self) - - def update(self, other): - for item in other: - self.add(item) - - def __ior__(self, other): - self.update(other) - return self - - def intersection_update(self, other): - for item in self: - if item not in other: - self.remove(item) - - def __iand__(self, other): - self.intersection_update(other) - return self - - def difference_update(self, other): - for item in other: - if item in self: - self.remove(item) - - def __isub__(self, other): - self.difference_update(other) - return self - - def symmetric_difference_update(self, other): - for item in other: - if item in self: - self.remove(item) - else: - self.add(item) - - def __ixor__(self, other): - self.symmetric_difference_update(other) - return self - - def discard(self, item): - try: - self.remove(item) - except KeyError: - pass - - def clear(self): - for item in list(self): - self.remove(item) From scoder at codespeak.net Wed Feb 13 21:49:39 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 13 Feb 2008 21:49:39 +0100 (CET) Subject: [Lxml-checkins] r51462 - lxml/trunk Message-ID: <20080213204939.5CE8116840A@codespeak.net> Author: scoder Date: Wed Feb 13 21:49:38 2008 New Revision: 51462 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r3484 at delle: sbehnel | 2008-02-13 21:48:51 +0100 changelog fix Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Feb 13 21:49:38 2008 @@ -19,6 +19,10 @@ * The module ``lxml.html.builder`` was duplicated as ``lxml.htmlbuilder`` +* Form elements would return None for ``form.fields.keys()`` if there + was an unnamed input field. Now unnamed input fields are completely + ignored. + * Setting an element slice in objectify could insert slice-overlapping elements at the wrong position. From scoder at codespeak.net Wed Feb 13 22:31:38 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 13 Feb 2008 22:31:38 +0100 (CET) Subject: [Lxml-checkins] r51465 - in lxml/trunk: . doc Message-ID: <20080213213138.B601A168411@codespeak.net> Author: scoder Date: Wed Feb 13 22:31:37 2008 New Revision: 51465 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/doc/main.txt Log: r3489 at delle: sbehnel | 2008-02-13 22:30:41 +0100 release date of 2.0.1 Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Feb 13 22:31:37 2008 @@ -2,8 +2,8 @@ lxml changelog ============== -2.0.1 (Under development) -========================= +2.0.1 (2008-02-13) +================== Features added -------------- @@ -57,10 +57,6 @@ Bugs fixed ---------- -* Form elements would return None for ``form.fields.keys()`` if there - was an unnamed input field. Now unnamed input fields are completely - ignored. - Other changes ------------- Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Wed Feb 13 22:31:37 2008 @@ -145,7 +145,7 @@ .. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/ .. _`this key`: pubkey.asc -The latest version is `lxml 2.0.1`_, released 2008-02-08 +The latest version is `lxml 2.0.1`_, released 2008-02-13 (`changes for 2.0.1`_). `Older versions`_ are listed below. .. _`Older versions`: #old-versions From scoder at codespeak.net Wed Feb 13 22:35:36 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 13 Feb 2008 22:35:36 +0100 (CET) Subject: [Lxml-checkins] r51466 - lxml/trunk Message-ID: <20080213213536.B1A12168411@codespeak.net> Author: scoder Date: Wed Feb 13 22:35:34 2008 New Revision: 51466 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r3493 at delle: sbehnel | 2008-02-13 22:34:59 +0100 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Feb 13 22:35:34 2008 @@ -29,6 +29,9 @@ Other changes ------------- +* The generated API documentation was cleaned up and disburdened from + non-public classes etc. + * The previously public module ``lxml.html.setmixin`` was renamed to ``lxml.html._setmixin`` as it is not an official part of lxml. If you want to use it, feel free to copy it over to your own source From scoder at codespeak.net Wed Feb 13 23:02:17 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 13 Feb 2008 23:02:17 +0100 (CET) Subject: [Lxml-checkins] r51467 - lxml/tag/lxml-2.0.1 Message-ID: <20080213220217.1BB4E168406@codespeak.net> Author: scoder Date: Wed Feb 13 23:02:17 2008 New Revision: 51467 Added: lxml/tag/lxml-2.0.1/ - copied from r51466, lxml/trunk/ Log: tag for lxml 2.0.1 From scoder at codespeak.net Thu Feb 14 09:19:44 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 14 Feb 2008 09:19:44 +0100 (CET) Subject: [Lxml-checkins] r51473 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20080214081944.1629E168411@codespeak.net> Author: scoder Date: Thu Feb 14 09:19:43 2008 New Revision: 51473 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.pyclasslookup.pyx lxml/trunk/src/lxml/tests/test_pyclasslookup.py Log: r3497 at delle: sbehnel | 2008-02-14 09:17:23 +0100 iterchildren() method in lxml.pyclasslookup, faster proxy instantiation Modified: lxml/trunk/src/lxml/lxml.pyclasslookup.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.pyclasslookup.pyx (original) +++ lxml/trunk/src/lxml/lxml.pyclasslookup.pyx Thu Feb 14 09:19:43 2008 @@ -1,9 +1,10 @@ """ A whole-tree Element class lookup scheme for `lxml.etree`. -This class lookup scheme allows access to the entire XML tree. To use -it, let a class inherit from `PythonElementClassLookup` and -re-implement the ``lookup(self, doc, root)`` method: +This class lookup scheme allows access to the entire XML tree in +read-only mode. To use it, let a class inherit from +`PythonElementClassLookup` and re-implement the ``lookup(self, doc, +root)`` method: >>> from lxml import etree, pyclasslookup >>> @@ -21,6 +22,15 @@ ... # delegate to default ... return None +Note that the API of the Element objects is not complete. It is +purely read-only and does not support all features of the normal +`lxml.etree` API (such as XPath, extended slicing or some iteration +methods). + +Also, you cannot wrap such a read-only Element in an ElementTree, and +you must take care not to keep a reference to them outside of the +`lookup()` method. + See http://codespeak.net/lxml/element_classes.html """ @@ -43,6 +53,7 @@ __version__ = etree.__version__ cdef class _ElementProxy: + "The main read-only Element proxy class (for internal use only!)." cdef tree.xmlNode* _c_node cdef object _source_proxy cdef object _dependent_proxies @@ -157,6 +168,18 @@ def __iter__(self): return iter(self.getchildren()) + def iterchildren(self, tag=None, *, reversed=False): + """iterchildren(self, tag=None, reversed=False) + + Iterate over the children of this element. + """ + children = self.getchildren() + if tag is not None: + children = [ el for el in children if el.tag == tag ] + if reversed: + children = children[::-1] + return iter(children) + def get(self, key, default=None): """Gets an element attribute. """ @@ -230,15 +253,21 @@ return _newProxy(self._source_proxy, c_node) return None + +cdef extern from "etree_defs.h": + # macro call to 't->tp_new()' for fast instantiation + cdef _ElementProxy NEW_PROXY "PY_NEW" (object t) + cdef _ElementProxy _newProxy(_ElementProxy sourceProxy, tree.xmlNode* c_node): cdef _ElementProxy el - el = _ElementProxy() + el = NEW_PROXY(_ElementProxy) el._c_node = c_node if sourceProxy is None: - sourceProxy = el - el._dependent_proxies = [] - el._source_proxy = sourceProxy - python.PyList_Append(sourceProxy._dependent_proxies, el) + el._source_proxy = el + el._dependent_proxies = [el] + else: + el._source_proxy = sourceProxy + python.PyList_Append(sourceProxy._dependent_proxies, el) return el cdef _freeProxies(_ElementProxy sourceProxy): Modified: lxml/trunk/src/lxml/tests/test_pyclasslookup.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_pyclasslookup.py (original) +++ lxml/trunk/src/lxml/tests/test_pyclasslookup.py Thu Feb 14 09:19:43 2008 @@ -259,6 +259,43 @@ self.assertEquals([ c.tag for c in root.getchildren() ], child_tags) + def test_lookup_iterchildren(self): + el_class = self._buildElementClass() + el_class.CHILD_TAGS = None + def lookup(doc, el): + if el_class.CHILD_TAGS is None: + el_class.CHILD_TAGS = [ c.tag for c in el.iterchildren() ] + return el_class + self._setClassLookup(lookup) + root = self.XML(xml_str) + child_tags = root.CHILD_TAGS + self.assertNotEquals(None, child_tags) + self.assertEquals([ c.tag for c in root.getchildren() ], + child_tags) + + def test_lookup_iterchildren_tag(self): + el_class = self._buildElementClass() + el_class.CHILD_TAGS = None + def lookup(doc, el): + if not el_class.CHILD_TAGS: + el_class.CHILD_TAGS = [ + c.tag for c in el.iterchildren(tag='{objectified}c2') ] + return el_class + self._setClassLookup(lookup) + + root = self.XML(xml_str) + child_tags = root.CHILD_TAGS + self.assertNotEquals(None, child_tags) + self.assertEquals([], child_tags) + + c1 = root[0] + child_tags = root.CHILD_TAGS + self.assertNotEquals(None, child_tags) + self.assertNotEquals([], child_tags) + self.assertEquals( + [ c.tag for c in root[0].iterchildren(tag='{objectified}c2') ], + child_tags) + def test_lookup_getparent(self): el_class = self._buildElementClass() el_class.PARENT = None From scoder at codespeak.net Thu Feb 14 09:19:48 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 14 Feb 2008 09:19:48 +0100 (CET) Subject: [Lxml-checkins] r51474 - in lxml/trunk: . doc Message-ID: <20080214081948.23218168412@codespeak.net> Author: scoder Date: Thu Feb 14 09:19:47 2008 New Revision: 51474 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/FAQ.txt Log: r3498 at delle: sbehnel | 2008-02-14 09:18:57 +0100 link to a new lxml tutorial Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Thu Feb 14 09:19:47 2008 @@ -57,19 +57,23 @@ Is there a tutorial? -------------------- -Read the `lxml.etree Tutorial`_. While this is still work in progress (just -as any good documentation), it provides an overview of the most important -concepts in ``lxml.etree``. If you want to help out, the tutorial is a very -good place to start. +Read the `lxml.etree Tutorial`_. While this is still work in progress +(just as any good documentation), it provides an overview of the most +important concepts in ``lxml.etree``. If you want to help out, +improving the tutorial is a very good place to start. There is also a `tutorial for ElementTree`_ which works for ``lxml.etree``. The `API documentation`_ also contains many examples for ``lxml.etree``. To learn using ``lxml.objectify``, read the `objectify documentation`_. +John Shipman has written another tutorial called `Python XML +processing with lxml`_ that contains lots of examples. + .. _`lxml.etree Tutorial`: tutorial.html .. _`tutorial for ElementTree`: http://effbot.org/zone/element.htm .. _`API documentation`: api.html .. _`objectify documentation`: objectify.html +.. _`Python XML processing with lxml`: http://www.nmt.edu/tcc/help/pubs/pylxml/ Where can I find more documentation about lxml? From scoder at codespeak.net Thu Feb 14 09:44:26 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 14 Feb 2008 09:44:26 +0100 (CET) Subject: [Lxml-checkins] r51475 - in lxml/trunk: . src/lxml Message-ID: <20080214084426.E809E168412@codespeak.net> Author: scoder Date: Thu Feb 14 09:44:25 2008 New Revision: 51475 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.pyclasslookup.pyx Log: r3501 at delle: sbehnel | 2008-02-14 09:30:32 +0100 use 'cpdef' for internally used method in lxml.pyclasslookup Modified: lxml/trunk/src/lxml/lxml.pyclasslookup.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.pyclasslookup.pyx (original) +++ lxml/trunk/src/lxml/lxml.pyclasslookup.pyx Thu Feb 14 09:44:25 2008 @@ -207,7 +207,7 @@ self._assertNode() return cetree.collectAttributes(self._c_node, 3) - def getchildren(self): + cpdef getchildren(self): """Returns all subelements. The elements are returned in document order. """ From scoder at codespeak.net Thu Feb 14 15:52:31 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 14 Feb 2008 15:52:31 +0100 (CET) Subject: [Lxml-checkins] r51487 - in lxml/branch/lxml-2.0: . doc src/lxml src/lxml/html src/lxml/html/tests src/lxml/tests Message-ID: <20080214145231.26BA8168412@codespeak.net> Author: scoder Date: Thu Feb 14 15:52:28 2008 New Revision: 51487 Added: lxml/branch/lxml-2.0/src/lxml/html/_setmixin.py - copied unchanged from r51486, lxml/trunk/src/lxml/html/_setmixin.py Removed: lxml/branch/lxml-2.0/src/lxml/html/setmixin.py lxml/branch/lxml-2.0/src/lxml/htmlbuilder.py Modified: lxml/branch/lxml-2.0/CHANGES.txt lxml/branch/lxml-2.0/Makefile lxml/branch/lxml-2.0/doc/FAQ.txt lxml/branch/lxml-2.0/doc/build.txt lxml/branch/lxml-2.0/doc/main.txt lxml/branch/lxml-2.0/doc/performance.txt lxml/branch/lxml-2.0/setup.py lxml/branch/lxml-2.0/setupinfo.py lxml/branch/lxml-2.0/src/lxml/ElementInclude.py lxml/branch/lxml-2.0/src/lxml/builder.py lxml/branch/lxml-2.0/src/lxml/classlookup.pxi lxml/branch/lxml-2.0/src/lxml/cssselect.py lxml/branch/lxml-2.0/src/lxml/docloader.pxi lxml/branch/lxml-2.0/src/lxml/doctestcompare.py lxml/branch/lxml-2.0/src/lxml/dtd.pxi lxml/branch/lxml-2.0/src/lxml/extensions.pxi lxml/branch/lxml-2.0/src/lxml/html/__init__.py lxml/branch/lxml-2.0/src/lxml/html/builder.py lxml/branch/lxml-2.0/src/lxml/html/clean.py lxml/branch/lxml-2.0/src/lxml/html/diff.py lxml/branch/lxml-2.0/src/lxml/html/tests/test_diff.txt lxml/branch/lxml-2.0/src/lxml/html/tests/test_forms.txt lxml/branch/lxml-2.0/src/lxml/html/usedoctest.py lxml/branch/lxml-2.0/src/lxml/iterparse.pxi lxml/branch/lxml-2.0/src/lxml/lxml.etree.pyx lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx lxml/branch/lxml-2.0/src/lxml/lxml.pyclasslookup.pyx lxml/branch/lxml-2.0/src/lxml/nsclasses.pxi lxml/branch/lxml-2.0/src/lxml/objectpath.pxi lxml/branch/lxml-2.0/src/lxml/parser.pxi lxml/branch/lxml-2.0/src/lxml/relaxng.pxi lxml/branch/lxml-2.0/src/lxml/sax.py lxml/branch/lxml-2.0/src/lxml/schematron.pxi lxml/branch/lxml-2.0/src/lxml/tests/__init__.py lxml/branch/lxml-2.0/src/lxml/tests/common_imports.py lxml/branch/lxml-2.0/src/lxml/tests/test_elementtree.py lxml/branch/lxml-2.0/src/lxml/tests/test_objectify.py lxml/branch/lxml-2.0/src/lxml/tests/test_pyclasslookup.py lxml/branch/lxml-2.0/src/lxml/usedoctest.py lxml/branch/lxml-2.0/src/lxml/xinclude.pxi lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi lxml/branch/lxml-2.0/src/lxml/xmlid.pxi lxml/branch/lxml-2.0/src/lxml/xmlschema.pxi lxml/branch/lxml-2.0/src/lxml/xpath.pxi lxml/branch/lxml-2.0/src/lxml/xslt.pxi lxml/branch/lxml-2.0/version.txt Log: trunk merge Modified: lxml/branch/lxml-2.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.0/CHANGES.txt (original) +++ lxml/branch/lxml-2.0/CHANGES.txt Thu Feb 14 15:52:28 2008 @@ -2,6 +2,46 @@ lxml changelog ============== +2.0.1 (2008-02-13) +================== + +Features added +-------------- + +* Child iteration in ``lxml.pyclasslookup``. + +* Loads of new docstrings reflect the signature of functions and + methods to make them visible in API docs and ``help()`` + +Bugs fixed +---------- + +* The module ``lxml.html.builder`` was duplicated as + ``lxml.htmlbuilder`` + +* Form elements would return None for ``form.fields.keys()`` if there + was an unnamed input field. Now unnamed input fields are completely + ignored. + +* Setting an element slice in objectify could insert slice-overlapping + elements at the wrong position. + +Other changes +------------- + +* The generated API documentation was cleaned up and disburdened from + non-public classes etc. + +* The previously public module ``lxml.html.setmixin`` was renamed to + ``lxml.html._setmixin`` as it is not an official part of lxml. If + you want to use it, feel free to copy it over to your own source + base. + +* Passing ``--with-xslt-config=/path/to/xslt-config`` to setup.py will + override the ``xslt-config`` script that is used to determine the C + compiler options. + + 2.0 (2008-02-01) ================ Modified: lxml/branch/lxml-2.0/Makefile ============================================================================== --- lxml/branch/lxml-2.0/Makefile (original) +++ lxml/branch/lxml-2.0/Makefile Thu Feb 14 15:52:28 2008 @@ -42,10 +42,11 @@ rm -fr doc/html/api @[ -x "`which epydoc`" ] \ && (cd src && echo "Generating API docs ..." && \ - PYTHONPATH=. epydoc -v -o ../doc/html/api --name lxml --url http://codespeak.net/lxml/ lxml/) \ + PYTHONPATH=. epydoc -v --docformat "restructuredtext en" \ + -o ../doc/html/api --no-private --exclude='[.]html[.]tests|[.]_' \ + --name lxml --url http://codespeak.net/lxml/ lxml/) \ || (echo "not generating epydoc API documentation") -# XXX What should the default be? test: test_inplace valtest: valgrind_test_inplace Modified: lxml/branch/lxml-2.0/doc/FAQ.txt ============================================================================== --- lxml/branch/lxml-2.0/doc/FAQ.txt (original) +++ lxml/branch/lxml-2.0/doc/FAQ.txt Thu Feb 14 15:52:28 2008 @@ -57,19 +57,23 @@ Is there a tutorial? -------------------- -Read the `lxml.etree Tutorial`_. While this is still work in progress (just -as any good documentation), it provides an overview of the most important -concepts in ``lxml.etree``. If you want to help out, the tutorial is a very -good place to start. +Read the `lxml.etree Tutorial`_. While this is still work in progress +(just as any good documentation), it provides an overview of the most +important concepts in ``lxml.etree``. If you want to help out, +improving the tutorial is a very good place to start. There is also a `tutorial for ElementTree`_ which works for ``lxml.etree``. The `API documentation`_ also contains many examples for ``lxml.etree``. To learn using ``lxml.objectify``, read the `objectify documentation`_. +John Shipman has written another tutorial called `Python XML +processing with lxml`_ that contains lots of examples. + .. _`lxml.etree Tutorial`: tutorial.html .. _`tutorial for ElementTree`: http://effbot.org/zone/element.htm .. _`API documentation`: api.html .. _`objectify documentation`: objectify.html +.. _`Python XML processing with lxml`: http://www.nmt.edu/tcc/help/pubs/pylxml/ Where can I find more documentation about lxml? @@ -116,10 +120,11 @@ Also note that the compatibility to the ElementTree library does not require projects to set a hard dependency on lxml - as long as they do -not need lxml's enhanced feature set. +not take advantage of lxml's enhanced feature set. * cssutils_, a CSS parser and toolkit, can be used with ``lxml.cssselect`` * Deliverance_, a content theming tool +* `Enfold Proxy 4`_, a web server accelerator with on-the-fly XSLT processing * Inteproxy_, a secure HTTP proxy * lwebstring_, an XML template engine * OpenXMLlib_, a library for handling OpenXML document meta data @@ -136,6 +141,7 @@ .. _cssutils: http://code.google.com/p/cssutils/source/browse/trunk/examples/style.py?r=917 .. _Deliverance: http://www.openplans.org/projects/deliverance/project-home +.. _`Enfold Proxy 4`: http://www.enfoldsystems.com/Products/Proxy/4 .. _gocept.lxml: http://pypi.python.org/pypi/gocept.lxml .. _Inteproxy: http://lists.wald.intevation.org/pipermail/inteproxy-devel/2007-February/000000.html .. _lwebstring: http://pypi.python.org/pypi/lwebstring Modified: lxml/branch/lxml-2.0/doc/build.txt ============================================================================== --- lxml/branch/lxml-2.0/doc/build.txt (original) +++ lxml/branch/lxml-2.0/doc/build.txt Thu Feb 14 15:52:28 2008 @@ -23,22 +23,31 @@ Cython ------ -The lxml.etree and lxml.objectify modules are written in Cython_. Since we -distribute the Cython-generated .c files with lxml releases, however, you do -not need Cython to build lxml from the normal release sources. - +.. _EasyInstall: http://peak.telecommunity.com/DevCenter/EasyInstall .. _Cython: http://www.cython.org -If you are interested in building lxml from a Subversion checkout or want to -be an lxml developer, you do need a working Cython installation. You can use -EasyInstall_ to install it:: +The lxml.etree and lxml.objectify modules are written in Cython_. +Since we distribute the Cython-generated .c files with lxml releases, +however, you do not need Cython to build lxml from the normal release +sources. We even encourage you to *not install Cython* for a normal +release build, as the generated C code can vary quite heavily between +Cython versions, which may or may not generate correct code for lxml. +The pre-generated release sources were tested and therefore are known +to work. + +So, if you want a reliable build of lxml, we suggest to a) use a +source release of lxml and b) disable or uninstall Cython for the +build. + +*Only* if you are interested in building lxml from a Subversion +checkout (e.g. to test a bug fix that has not been release yet) or if +want to be an lxml developer, then you do need a working Cython +installation. You can use EasyInstall_ to install it:: - easy_install Cython==0.9.6.11 - -.. _EasyInstall: http://peak.telecommunity.com/DevCenter/EasyInstall + easy_install Cython==0.9.6.11b -lxml currently requires at least Cython 0.9.6.11, but later versions -should work. +lxml currently requires Cython 0.9.6.11b, later versions were not +tested. Subversion @@ -79,9 +88,13 @@ If you get errors about missing header files (e.g. ``libxml/xmlversion.h``) then you need to make sure the development packages of both libxml2 -and libxslt are properly installed. If this doesn't help, you may -have to add the location of the header files to the include path -like:: +and libxslt are properly installed. Try passing the following option to +setup.py to make sure the right config is found:: + + python setup.py build --with-xslt-config=/path/to/xslt-config + +If this doesn't help, you may have to add the location of the header +files to the include path like:: python setup.py build_ext -i -I /usr/include/libxml2 @@ -165,15 +178,28 @@ an older version. The result can be segfaults on this platform that are hard to track down. -To make sure the newer libxml2 and libxslt versions are used (e.g. under -fink), you should add the directory where you installed the libraries to the -``DYLD_LIBRARY_PATH`` environment variable. This seems to fix a lot of -problems for users. - -Alternatively, you can build lxml statically. A way to do this on MS Windows -is described in the next section, but it should be easy to adapt it for -Mac-OS. That way, you can always be sure you use the versions you compiled -lxml with, regardless of the runtime environement. +To make sure the newer libxml2 and libxslt versions (e.g. those +provided by fink or macports) are used at *build time*, you must take +care that the script ``xslt-config`` is found from the newly installed +version when running the build setup. The system libraries also +provide this script, but the new one must come first in the PATH. The +best way to make sure the right version is used is by passing the path +to the script as an option to setup.py:: + + python setup.py build --with-xslt-config=/path/to/xslt-config + +To make sure the newer libxml2 and libxslt versions are used at +*runtime*, you should add *all* directories where the newer libraries +are installed (i.e. libxml2, libxslt and libexslt) to the +``DYLD_LIBRARY_PATH`` environment variable when you use lxml (i.e. not +only at build time). This seems to fix a lot of problems for users. + +Please read this thread about `experiences with MacOS-X`_ if you +encounter problems. It also has a `buildout for lxml`_ that you can +use. + +.. _`experiences with MacOS-X`: http://thread.gmane.org/gmane.comp.python.lxml.devel/3290/focus=3290 +.. _`buildout for lxml`: http://thread.gmane.org/gmane.comp.python.lxml.devel/3290/focus=3297 Static linking on Windows Modified: lxml/branch/lxml-2.0/doc/main.txt ============================================================================== --- lxml/branch/lxml-2.0/doc/main.txt (original) +++ lxml/branch/lxml-2.0/doc/main.txt Thu Feb 14 15:52:28 2008 @@ -145,8 +145,8 @@ .. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/ .. _`this key`: pubkey.asc -The latest version is `lxml 2.0`_, released 2008-02-01 -(`changes for 2.0`_). `Older versions`_ are listed below. +The latest version is `lxml 2.0.1`_, released 2008-02-13 +(`changes for 2.0.1`_). `Older versions`_ are listed below. .. _`Older versions`: #old-versions @@ -206,21 +206,7 @@ Old Versions ------------ -* `lxml 2.0beta2`_, released 2008-01-26 (`changes for 2.0beta2`_) - -* `lxml 2.0beta1`_, released 2008-01-11 (`changes for 2.0beta1`_) - -* `lxml 2.0alpha6`_, released 2007-12-19 (`changes for 2.0alpha6`_) - -* `lxml 2.0alpha5`_, released 2007-11-24 (`changes for 2.0alpha5`_) - -* `lxml 2.0alpha4`_, released 2007-10-07 (`changes for 2.0alpha4`_) - -* `lxml 2.0alpha3`_, released 2007-09-26 (`changes for 2.0alpha3`_) - -* `lxml 2.0alpha2`_, released 2007-09-15 (`changes for 2.0alpha2`_) - -* `lxml 2.0alpha1`_, released 2007-09-02 (`changes for 2.0alpha1`_) +* `lxml 2.0`_, released 2008-02-01 (`changes for 2.0`_) * `lxml 1.3.6`_, released 2007-10-29 (`changes for 1.3.6`_) @@ -272,15 +258,8 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.0.1`: lxml-2.0.1.tgz .. _`lxml 2.0`: lxml-2.0.tgz -.. _`lxml 2.0beta2`: lxml-2.0beta2.tgz -.. _`lxml 2.0beta1`: lxml-2.0beta1.tgz -.. _`lxml 2.0alpha6`: lxml-2.0alpha6.tgz -.. _`lxml 2.0alpha5`: lxml-2.0alpha5.tgz -.. _`lxml 2.0alpha4`: lxml-2.0alpha4.tgz -.. _`lxml 2.0alpha3`: lxml-2.0alpha3.tgz -.. _`lxml 2.0alpha2`: lxml-2.0alpha2.tgz -.. _`lxml 2.0alpha1`: lxml-2.0alpha1.tgz .. _`lxml 1.3.6`: lxml-1.3.6.tgz .. _`lxml 1.3.5`: lxml-1.3.5.tgz .. _`lxml 1.3.4`: lxml-1.3.4.tgz @@ -306,15 +285,8 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.0.1`: changes-2.0.1.html .. _`changes for 2.0`: changes-2.0.html -.. _`changes for 2.0beta2`: changes-2.0beta2.html -.. _`changes for 2.0beta1`: changes-2.0beta1.html -.. _`changes for 2.0alpha6`: changes-2.0alpha6.html -.. _`changes for 2.0alpha5`: changes-2.0alpha5.html -.. _`changes for 2.0alpha4`: changes-2.0alpha4.html -.. _`changes for 2.0alpha3`: changes-2.0alpha3.html -.. _`changes for 2.0alpha2`: changes-2.0alpha2.html -.. _`changes for 2.0alpha1`: changes-2.0alpha1.html .. _`changes for 1.3.6`: changes-1.3.6.html .. _`changes for 1.3.5`: changes-1.3.5.html .. _`changes for 1.3.4`: changes-1.3.4.html Modified: lxml/branch/lxml-2.0/doc/performance.txt ============================================================================== --- lxml/branch/lxml-2.0/doc/performance.txt (original) +++ lxml/branch/lxml-2.0/doc/performance.txt Thu Feb 14 15:52:28 2008 @@ -71,8 +71,8 @@ a specific part of the API yourself, please consider sending it to the lxml mailing list. -The timings cited below compare lxml 2.0alpha (with libxml2 2.6.30) to -the December 2007 SVN trunk versions of ElementTree (1.3) and +The timings cited below compare lxml 2.0 final (with libxml2 2.6.31) +to the January 2008 SVN trunk versions of ElementTree (1.3alpha) and cElementTree (1.2.7). They were run single-threaded on a 1.8GHz Intel Core Duo machine under Ubuntu Linux 7.10 (Gutsy). The C libraries were compiled with the same platform specific optimisation flags. The @@ -117,23 +117,23 @@ 1.2, lxml is still more than 5 times as fast as the much improved ElementTree 1.3:: - lxe: tostring_utf16 (SATR T1) 23.4821 msec/pass + lxe: tostring_utf16 (SATR T1) 19.0921 msec/pass cET: tostring_utf16 (SATR T1) 129.8430 msec/pass ET : tostring_utf16 (SATR T1) 136.1301 msec/pass - lxe: tostring_utf16 (UATR T1) 23.4859 msec/pass + lxe: tostring_utf16 (UATR T1) 20.4630 msec/pass cET: tostring_utf16 (UATR T1) 130.1570 msec/pass ET : tostring_utf16 (UATR T1) 136.3101 msec/pass - lxe: tostring_utf16 (S-TR T2) 24.2729 msec/pass + lxe: tostring_utf16 (S-TR T2) 18.8632 msec/pass cET: tostring_utf16 (S-TR T2) 136.9388 msec/pass ET : tostring_utf16 (S-TR T2) 143.9550 msec/pass - lxe: tostring_utf8 (S-TR T2) 18.4860 msec/pass + lxe: tostring_utf8 (S-TR T2) 14.4310 msec/pass cET: tostring_utf8 (S-TR T2) 137.0859 msec/pass ET : tostring_utf8 (S-TR T2) 144.3110 msec/pass - lxe: tostring_utf8 (U-TR T3) 2.7399 msec/pass + lxe: tostring_utf8 (U-TR T3) 2.6381 msec/pass cET: tostring_utf8 (U-TR T3) 52.1040 msec/pass ET : tostring_utf8 (U-TR T3) 53.1070 msec/pass @@ -205,10 +205,10 @@ (given in seconds):: lxe: -- S- U- -A SA UA - T1: 0.0914 0.0875 0.0872 0.0892 0.0882 0.0900 - T2: 0.0894 0.0897 0.0892 0.0988 0.0978 0.0974 - T3: 0.0219 0.0194 0.0189 0.0570 0.0570 0.0573 - T4: 0.0004 0.0003 0.0003 0.0012 0.0012 0.0012 + T1: 0.0783 0.0777 0.0774 0.0787 0.0781 0.0783 + T2: 0.0799 0.0796 0.0799 0.0879 0.0882 0.0886 + T3: 0.0245 0.0216 0.0217 0.0577 0.0575 0.0572 + T4: 0.0003 0.0003 0.0003 0.0011 0.0011 0.0011 cET: -- S- U- -A SA UA T1: 0.0272 0.0264 0.0267 0.0268 0.0261 0.0265 T2: 0.0280 0.0274 0.0273 0.0273 0.0276 0.0275 @@ -235,21 +235,21 @@ create a shallow copy of their list of children, lxml has to create a Python object for each child and collect them in a list:: - lxe: root_list_children (--TR T1) 0.0169 msec/pass + lxe: root_list_children (--TR T1) 0.0160 msec/pass cET: root_list_children (--TR T1) 0.0081 msec/pass ET : root_list_children (--TR T1) 0.0541 msec/pass - lxe: root_list_children (--TR T2) 0.2339 msec/pass + lxe: root_list_children (--TR T2) 0.2100 msec/pass cET: root_list_children (--TR T2) 0.0319 msec/pass ET : root_list_children (--TR T2) 0.4420 msec/pass This handicap is also visible when accessing single children:: - lxe: first_child (--TR T2) 0.2470 msec/pass + lxe: first_child (--TR T2) 0.2429 msec/pass cET: first_child (--TR T2) 0.2170 msec/pass ET : first_child (--TR T2) 0.9968 msec/pass - lxe: last_child (--TR T1) 0.2482 msec/pass + lxe: last_child (--TR T1) 0.2470 msec/pass cET: last_child (--TR T1) 0.2291 msec/pass ET : last_child (--TR T1) 0.9830 msec/pass @@ -258,11 +258,11 @@ The data structure used by libxml2 is a linked tree, and thus, a linked list of children:: - lxe: middle_child (--TR T1) 0.2789 msec/pass + lxe: middle_child (--TR T1) 0.2759 msec/pass cET: middle_child (--TR T1) 0.2229 msec/pass ET : middle_child (--TR T1) 1.0030 msec/pass - lxe: middle_child (--TR T2) 1.9610 msec/pass + lxe: middle_child (--TR T2) 1.7071 msec/pass cET: middle_child (--TR T2) 0.2229 msec/pass ET : middle_child (--TR T2) 0.9930 msec/pass @@ -274,7 +274,7 @@ in. This results in a major performance difference for creating independent Elements that end up in independently created documents:: - lxe: create_elements (--TC T2) 3.1691 msec/pass + lxe: create_elements (--TC T2) 2.8961 msec/pass cET: create_elements (--TC T2) 0.1929 msec/pass ET : create_elements (--TC T2) 1.3590 msec/pass @@ -282,11 +282,11 @@ are supposed to end up in, either as SubElements of an Element or using the explicit ``Element.makeelement()`` call:: - lxe: makeelement (--TC T2) 2.2650 msec/pass + lxe: makeelement (--TC T2) 1.9000 msec/pass cET: makeelement (--TC T2) 0.3211 msec/pass ET : makeelement (--TC T2) 1.6358 msec/pass - lxe: create_subelements (--TC T2) 1.9531 msec/pass + lxe: create_subelements (--TC T2) 1.7891 msec/pass cET: create_subelements (--TC T2) 0.2351 msec/pass ET : create_subelements (--TC T2) 3.2270 msec/pass @@ -305,11 +305,11 @@ The following benchmark appends all root children of the second tree to the root of the first tree:: - lxe: append_from_document (--TR T1,T2) 3.8681 msec/pass + lxe: append_from_document (--TR T1,T2) 3.3841 msec/pass cET: append_from_document (--TR T1,T2) 0.2699 msec/pass ET : append_from_document (--TR T1,T2) 1.2650 msec/pass - lxe: append_from_document (--TR T3,T4) 0.0570 msec/pass + lxe: append_from_document (--TR T3,T4) 0.0441 msec/pass cET: append_from_document (--TR T3,T4) 0.0169 msec/pass ET : append_from_document (--TR T3,T4) 0.0820 msec/pass @@ -322,20 +322,20 @@ This difference is not always as visible, but applies to most parts of the API, like inserting newly created elements:: - lxe: insert_from_document (--TR T1,T2) 5.8019 msec/pass + lxe: insert_from_document (--TR T1,T2) 5.7020 msec/pass cET: insert_from_document (--TR T1,T2) 0.4041 msec/pass ET : insert_from_document (--TR T1,T2) 1.4789 msec/pass or replacing the child slice by a newly created element:: - lxe: replace_children_element (--TC T1) 0.2480 msec/pass + lxe: replace_children_element (--TC T1) 0.2210 msec/pass cET: replace_children_element (--TC T1) 0.0238 msec/pass ET : replace_children_element (--TC T1) 0.1600 msec/pass as opposed to replacing the slice with an existing element from the same document:: - lxe: replace_children (--TC T1) 0.0188 msec/pass + lxe: replace_children (--TC T1) 0.0179 msec/pass cET: replace_children (--TC T1) 0.0119 msec/pass ET : replace_children (--TC T1) 0.0739 msec/pass @@ -347,16 +347,16 @@ Deep copying a tree is fast in lxml:: - lxe: deepcopy_all (--TR T1) 10.9420 msec/pass + lxe: deepcopy_all (--TR T1) 9.7558 msec/pass cET: deepcopy_all (--TR T1) 120.6188 msec/pass ET : deepcopy_all (--TR T1) 902.6880 msec/pass - lxe: deepcopy_all (-ATR T2) 12.5830 msec/pass + lxe: deepcopy_all (-ATR T2) 12.3210 msec/pass cET: deepcopy_all (-ATR T2) 136.9810 msec/pass ET : deepcopy_all (-ATR T2) 944.2801 msec/pass - lxe: deepcopy_all (S-TR T3) 4.1170 msec/pass - cET: deepcopy_all (S-TR T3) 36.1221 msec/pass + lxe: deepcopy_all (S-TR T3) 8.3981 msec/pass + cET: deepcopy_all (S-TR T3) 35.6541 msec/pass ET : deepcopy_all (S-TR T3) 221.6041 msec/pass So, for example, if you have a database-like scenario where you parse in a @@ -372,37 +372,37 @@ especially if few elements are of interest or the target element tag name is known, lxml is a good choice:: - lxe: getiterator_all (--TR T1) 5.8582 msec/pass + lxe: getiterator_all (--TR T1) 5.7251 msec/pass cET: getiterator_all (--TR T1) 39.9489 msec/pass ET : getiterator_all (--TR T1) 23.0000 msec/pass - lxe: getiterator_islice (--TR T2) 0.0780 msec/pass + lxe: getiterator_islice (--TR T2) 0.0830 msec/pass cET: getiterator_islice (--TR T2) 0.3440 msec/pass ET : getiterator_islice (--TR T2) 0.2429 msec/pass - lxe: getiterator_tag (--TR T2) 0.3119 msec/pass + lxe: getiterator_tag (--TR T2) 0.3011 msec/pass cET: getiterator_tag (--TR T2) 14.1001 msec/pass ET : getiterator_tag (--TR T2) 7.4241 msec/pass - lxe: getiterator_tag_all (--TR T2) 0.6540 msec/pass + lxe: getiterator_tag_all (--TR T2) 0.6340 msec/pass cET: getiterator_tag_all (--TR T2) 40.7901 msec/pass ET : getiterator_tag_all (--TR T2) 21.0390 msec/pass This translates directly into similar timings for ``Element.findall()``:: - lxe: findall (--TR T2) 8.1239 msec/pass + lxe: findall (--TR T2) 7.8950 msec/pass cET: findall (--TR T2) 44.5340 msec/pass ET : findall (--TR T2) 27.1149 msec/pass - lxe: findall (--TR T3) 1.6870 msec/pass + lxe: findall (--TR T3) 1.7281 msec/pass cET: findall (--TR T3) 12.9611 msec/pass ET : findall (--TR T3) 8.6131 msec/pass - lxe: findall_tag (--TR T2) 0.7660 msec/pass + lxe: findall_tag (--TR T2) 0.7720 msec/pass cET: findall_tag (--TR T2) 40.6358 msec/pass ET : findall_tag (--TR T2) 21.4581 msec/pass - lxe: findall_tag (--TR T3) 0.2160 msec/pass + lxe: findall_tag (--TR T3) 0.2050 msec/pass cET: findall_tag (--TR T3) 9.6831 msec/pass ET : findall_tag (--TR T3) 5.2109 msec/pass @@ -420,38 +420,38 @@ of the lxml API you use. The most straight forward way is to call the ``xpath()`` method on an Element or ElementTree:: - lxe: xpath_method (--TC T1) 1.8251 msec/pass - lxe: xpath_method (--TC T2) 23.3159 msec/pass - lxe: xpath_method (--TC T3) 0.1378 msec/pass - lxe: xpath_method (--TC T4) 1.1270 msec/pass + lxe: xpath_method (--TC T1) 1.7459 msec/pass + lxe: xpath_method (--TC T2) 22.0850 msec/pass + lxe: xpath_method (--TC T3) 0.1309 msec/pass + lxe: xpath_method (--TC T4) 1.0772 msec/pass This is well suited for testing and when the XPath expressions are as diverse as the trees they are called on. However, if you have a single XPath expression that you want to apply to a larger number of different elements, the ``XPath`` class is the most efficient way to do it:: - lxe: xpath_class (--TC T1) 0.6981 msec/pass - lxe: xpath_class (--TC T2) 3.6111 msec/pass - lxe: xpath_class (--TC T3) 0.0591 msec/pass - lxe: xpath_class (--TC T4) 0.1979 msec/pass + lxe: xpath_class (--TC T1) 0.6740 msec/pass + lxe: xpath_class (--TC T2) 3.1760 msec/pass + lxe: xpath_class (--TC T3) 0.0548 msec/pass + lxe: xpath_class (--TC T4) 0.1700 msec/pass Note that this still allows you to use variables in the expression, so you can parse it once and then adapt it through variables at call time. In other cases, where you have a fixed Element or ElementTree and want to run different expressions on it, you should consider the ``XPathEvaluator``:: - lxe: xpath_element (--TR T1) 0.4342 msec/pass - lxe: xpath_element (--TR T2) 11.9958 msec/pass - lxe: xpath_element (--TR T3) 0.1690 msec/pass - lxe: xpath_element (--TR T4) 0.3510 msec/pass + lxe: xpath_element (--TR T1) 0.4151 msec/pass + lxe: xpath_element (--TR T2) 11.6129 msec/pass + lxe: xpath_element (--TR T3) 0.1299 msec/pass + lxe: xpath_element (--TR T4) 0.3409 msec/pass While it looks slightly slower, creating an XPath object for each of the expressions generates a much higher overhead here:: - lxe: xpath_class_repeat (--TC T1) 1.7619 msec/pass - lxe: xpath_class_repeat (--TC T2) 21.9102 msec/pass - lxe: xpath_class_repeat (--TC T3) 0.1330 msec/pass - lxe: xpath_class_repeat (--TC T4) 1.0631 msec/pass + lxe: xpath_class_repeat (--TC T1) 1.6699 msec/pass + lxe: xpath_class_repeat (--TC T2) 20.4420 msec/pass + lxe: xpath_class_repeat (--TC T3) 0.1230 msec/pass + lxe: xpath_class_repeat (--TC T4) 0.9859 msec/pass A longer example @@ -608,21 +608,21 @@ tree. It avoids step-by-step Python element instantiations along the path, which can substantially improve the access time:: - lxe: attribute (--TR T1) 9.8128 msec/pass - lxe: attribute (--TR T2) 53.2899 msec/pass - lxe: attribute (--TR T4) 9.6800 msec/pass - - lxe: objectpath (--TR T1) 5.4898 msec/pass - lxe: objectpath (--TR T2) 48.4819 msec/pass - lxe: objectpath (--TR T4) 5.3761 msec/pass - - lxe: attributes_deep (--TR T1) 56.3290 msec/pass - lxe: attributes_deep (--TR T2) 62.4361 msec/pass - lxe: attributes_deep (--TR T4) 15.8000 msec/pass - - lxe: objectpath_deep (--TR T1) 49.0060 msec/pass - lxe: objectpath_deep (--TR T2) 52.5169 msec/pass - lxe: objectpath_deep (--TR T4) 7.1371 msec/pass + lxe: attribute (--TR T1) 9.4581 msec/pass + lxe: attribute (--TR T2) 52.5560 msec/pass + lxe: attribute (--TR T4) 9.1729 msec/pass + + lxe: objectpath (--TR T1) 4.8690 msec/pass + lxe: objectpath (--TR T2) 47.8780 msec/pass + lxe: objectpath (--TR T4) 4.7870 msec/pass + + lxe: attributes_deep (--TR T1) 54.7471 msec/pass + lxe: attributes_deep (--TR T2) 62.7451 msec/pass + lxe: attributes_deep (--TR T4) 15.1050 msec/pass + + lxe: objectpath_deep (--TR T1) 48.2810 msec/pass + lxe: objectpath_deep (--TR T2) 51.3949 msec/pass + lxe: objectpath_deep (--TR T4) 6.1419 msec/pass Note, however, that parsing ObjectPath expressions is not for free either, so this is most effective for frequently accessing the same element. @@ -648,17 +648,17 @@ subtrees and elements) to cache, you can trade memory usage against access speed:: - lxe: attribute_cached (--TR T1) 7.6170 msec/pass - lxe: attribute_cached (--TR T2) 50.7941 msec/pass - lxe: attribute_cached (--TR T4) 7.4880 msec/pass - - lxe: attributes_deep_cached (--TR T1) 49.9220 msec/pass - lxe: attributes_deep_cached (--TR T2) 55.9340 msec/pass - lxe: attributes_deep_cached (--TR T4) 10.0131 msec/pass - - lxe: objectpath_deep_cached (--TR T1) 44.9121 msec/pass - lxe: objectpath_deep_cached (--TR T2) 48.2371 msec/pass - lxe: objectpath_deep_cached (--TR T4) 3.9630 msec/pass + lxe: attribute_cached (--TR T1) 7.5061 msec/pass + lxe: attribute_cached (--TR T2) 50.1881 msec/pass + lxe: attribute_cached (--TR T4) 7.4170 msec/pass + + lxe: attributes_deep_cached (--TR T1) 48.7239 msec/pass + lxe: attributes_deep_cached (--TR T2) 55.2199 msec/pass + lxe: attributes_deep_cached (--TR T4) 9.9740 msec/pass + + lxe: objectpath_deep_cached (--TR T1) 43.4160 msec/pass + lxe: objectpath_deep_cached (--TR T2) 47.6480 msec/pass + lxe: objectpath_deep_cached (--TR T4) 3.4680 msec/pass Things to note: you cannot currently use ``weakref.WeakKeyDictionary`` objects for this as lxml's element objects do not support weak references (which are Modified: lxml/branch/lxml-2.0/setup.py ============================================================================== --- lxml/branch/lxml-2.0/setup.py (original) +++ lxml/branch/lxml-2.0/setup.py Thu Feb 14 15:52:28 2008 @@ -76,7 +76,8 @@ It extends the ElementTree API significantly to offer support for XPath, RelaxNG, XML Schema, XSLT, C14N and much more. -To contact the project, go to the project home page or see our bug tracker at +To contact the project, go to the `project home page +`_ or see our bug tracker at https://launchpad.net/lxml In case you want to use the current in-development version of lxml, you can Modified: lxml/branch/lxml-2.0/setupinfo.py ============================================================================== --- lxml/branch/lxml-2.0/setupinfo.py (original) +++ lxml/branch/lxml-2.0/setupinfo.py Thu Feb 14 15:52:28 2008 @@ -81,7 +81,7 @@ return static_library_dirs # filter them from xslt-config --libs result = [] - possible_library_dirs = flags('xslt-config --libs') + possible_library_dirs = flags('libs') for possible_library_dir in possible_library_dirs: if possible_library_dir.startswith('-L'): result.append(possible_library_dir[2:]) @@ -95,7 +95,7 @@ return static_include_dirs # filter them from xslt-config --cflags result = [] - possible_include_dirs = flags('xslt-config --cflags') + possible_include_dirs = flags('cflags') for possible_include_dir in possible_include_dirs: if possible_include_dir.startswith('-I'): result.append(possible_include_dir[2:]) @@ -114,7 +114,7 @@ return result # anything from xslt-config --cflags that doesn't start with -I - possible_cflags = flags('xslt-config --cflags') + possible_cflags = flags('cflags') for possible_cflag in possible_cflags: if not possible_cflag.startswith('-I'): result.append(possible_cflag) @@ -127,8 +127,9 @@ if OPTION_WITHOUT_THREADING: macros.append(('WITHOUT_THREADING', None)) return macros - -def flags(cmd): + +def flags(option): + cmd = "%s --%s" % (find_xslt_config(), option) try: import subprocess except ImportError: @@ -145,6 +146,22 @@ print("** make sure the development packages of libxml2 and libxslt are installed **\n") return str(rf.read()).split() +XSLT_CONFIG = None + +def find_xslt_config(): + global XSLT_CONFIG + if XSLT_CONFIG: + return XSLT_CONFIG + option = '--with-xslt-config=' + for arg in sys.argv: + if arg.startswith(option): + sys.argv.remove(arg) + XSLT_CONFIG = arg[len(option):] + return XSLT_CONFIG + else: + XSLT_CONFIG = 'xslt-config' + return XSLT_CONFIG + def has_option(name): try: sys.argv.remove('--%s' % name) Modified: lxml/branch/lxml-2.0/src/lxml/ElementInclude.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/ElementInclude.py (original) +++ lxml/branch/lxml-2.0/src/lxml/ElementInclude.py Thu Feb 14 15:52:28 2008 @@ -41,9 +41,14 @@ # OF THIS SOFTWARE. # -------------------------------------------------------------------- -## -# Limited XInclude support for the ElementTree package. -## +""" +Limited XInclude support for the ElementTree package. + +While lxml.etree has full support for XInclude (see +`etree.ElementTree.xinclude()`), this module provides a simpler, pure +Python, ElementTree compatible implementation that supports a simple +form of custom URL resolvers. +""" import copy, etree from urlparse import urljoin Modified: lxml/branch/lxml-2.0/src/lxml/builder.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/builder.py (original) +++ lxml/branch/lxml-2.0/src/lxml/builder.py Thu Feb 14 15:52:28 2008 @@ -33,6 +33,10 @@ # OF THIS SOFTWARE. # -------------------------------------------------------------------- +""" +The ``E`` Element factory for generating XML documents. +""" + import etree as ET try: Modified: lxml/branch/lxml-2.0/src/lxml/classlookup.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/classlookup.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/classlookup.pxi Thu Feb 14 15:52:28 2008 @@ -53,7 +53,9 @@ # class to store element class lookup functions cdef public class ElementClassLookup [ type LxmlElementClassLookupType, object LxmlElementClassLookup ]: - """Superclass of Element class lookups. + """ElementClassLookup(self) + + Superclass of Element class lookups. """ cdef _element_class_lookup_function _lookup_function def __init__(self): @@ -62,18 +64,20 @@ cdef public class FallbackElementClassLookup(ElementClassLookup) \ [ type LxmlFallbackElementClassLookupType, object LxmlFallbackElementClassLookup ]: - """Superclass of Element class lookups with additional fallback. + """FallbackElementClassLookup(self, fallback=None) + + Superclass of Element class lookups with additional fallback. """ cdef readonly ElementClassLookup fallback cdef _element_class_lookup_function _fallback_function def __init__(self, ElementClassLookup fallback=None): self._lookup_function = NULL # use default lookup if fallback is not None: - self.setFallback(fallback) + self._setFallback(fallback) else: self._fallback_function = _lookupDefaultElementClass - def setFallback(self, ElementClassLookup lookup not None): + cdef void _setFallback(self, ElementClassLookup lookup): """Sets the fallback scheme for this lookup method. """ self.fallback = lookup @@ -81,6 +85,20 @@ if self._fallback_function is NULL: self._fallback_function = _lookupDefaultElementClass + def set_fallback(self, ElementClassLookup lookup not None): + """set_fallback(self, lookup) + + Sets the fallback scheme for this lookup method. + """ + self._setFallback(lookup) + + def setFallback(self, ElementClassLookup lookup not None): + """Sets the fallback scheme for this lookup method. + + :deprecated: use ``set_fallback()`` instead. + """ + self._setFallback(lookup) + cdef object _callFallback(self, _Document doc, xmlNode* c_node): return self._fallback_function(self.fallback, doc, c_node) @@ -89,7 +107,8 @@ # Custom Element class lookup schemes cdef class ElementDefaultClassLookup(ElementClassLookup): - """Element class lookup scheme that always returns the default Element + """ElementDefaultClassLookup(self, element=None, comment=None, pi=None, entity=None) + Element class lookup scheme that always returns the default Element class. The keyword arguments ``element``, ``comment``, ``pi`` and ``entity`` @@ -163,13 +182,14 @@ assert 0, "Unknown node type: %s" % c_node.type cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup): - """Checks an attribute of an Element and looks up the value in a class - dictionary. + """AttributeBasedElementClassLookup(self, attribute_name, class_mapping, fallback=None) + Checks an attribute of an Element and looks up the value in a + class dictionary. Arguments: - * attribute name - '{ns}name' style string - * class mapping - Python dict mapping attribute values to Element classes - * fallback - optional fallback lookup mechanism + - attribute name - '{ns}name' style string + - class mapping - Python dict mapping attribute values to Element classes + - fallback - optional fallback lookup mechanism A None key in the class mapping will be checked if the attribute is missing. @@ -207,7 +227,8 @@ cdef class ParserBasedElementClassLookup(FallbackElementClassLookup): - """Element class lookup based on the XML parser. + """ParserBasedElementClassLookup(self, fallback=None) + Element class lookup based on the XML parser. """ def __init__(self, ElementClassLookup fallback=None): FallbackElementClassLookup.__init__(self, fallback) @@ -221,7 +242,8 @@ cdef class CustomElementClassLookup(FallbackElementClassLookup): - """Element class lookup based on a subclass method. + """CustomElementClassLookup(self, fallback=None) + Element class lookup based on a subclass method. You can inherit from this class and override the method:: @@ -240,6 +262,7 @@ self._lookup_function = _custom_class_lookup def lookup(self, type, doc, namespace, name): + "lookup(self, type, doc, namespace, name)" return None cdef object _custom_class_lookup(state, _Document doc, xmlNode* c_node): @@ -291,11 +314,14 @@ LOOKUP_ELEMENT_CLASS = function def setElementClassLookup(ElementClassLookup lookup = None): - "@deprecated: use ``set_element_class_lookup(lookup)`` instead" + ":deprecated: use ``set_element_class_lookup(lookup)`` instead" set_element_class_lookup(lookup) def set_element_class_lookup(ElementClassLookup lookup = None): - "Set the global default element class lookup method." + """set_element_class_lookup(lookup = None) + + Set the global default element class lookup method. + """ if lookup is None or lookup._lookup_function is NULL: _setElementClassLookupFunction(NULL, None) else: Modified: lxml/branch/lxml-2.0/src/lxml/cssselect.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/cssselect.py (original) +++ lxml/branch/lxml-2.0/src/lxml/cssselect.py Thu Feb 14 15:52:28 2008 @@ -1,3 +1,9 @@ +"""CSS Selectors based on XPath. + +This module supports selecting XML/HTML tags based on CSS selectors. +See the `CSSSelector` class for details. +""" + import re from lxml import etree @@ -11,7 +17,17 @@ pass class CSSSelector(etree.XPath): + """A CSS selector. + + Usage:: + >>> from lxml import etree, cssselect + >>> select = cssselect.CSSSelector("a tag > child") + + >>> root = etree.XML("TEXT") + >>> [ el.tag for el in select(root) ] + ['child'] + """ def __init__(self, css): path = css_to_xpath(css) etree.XPath.__init__(self, path) @@ -575,9 +591,8 @@ self.condition = other.condition class XPathExprOr(XPathExpr): - """ - Represents on |'d expressions. Note that unfortunately it isn't + Represents |'d expressions. Note that unfortunately it isn't the union, it's the sum, so duplicate elements will appear. """ Modified: lxml/branch/lxml-2.0/src/lxml/docloader.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/docloader.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/docloader.pxi Thu Feb 14 15:52:28 2008 @@ -15,7 +15,9 @@ cdef class Resolver: "This is the base class of all resolvers." def resolve(self, system_url, public_id, context): - """Override this method to resolve an external source by + """resolve(self, system_url, public_id, context) + + Override this method to resolve an external source by ``system_url`` and ``public_id``. The third argument is an opaque context object. @@ -24,7 +26,9 @@ return None def resolve_empty(self, context): - """Return an empty input document. + """resolve_empty(self, context) + + Return an empty input document. Pass context as parameter. """ @@ -34,7 +38,9 @@ return doc_ref def resolve_string(self, string, context, *, base_url=None): - """Return a parsable string as input document. + """resolve_string(self, string, context, base_url=None) + + Return a parsable string as input document. Pass data string and context as parameters. @@ -49,7 +55,9 @@ return doc_ref def resolve_filename(self, filename, context): - """Return the name of a parsable file as input document. + """resolve_filename(self, filename, context) + + Return the name of a parsable file as input document. Pass filename and context as parameters. """ @@ -60,7 +68,9 @@ return doc_ref def resolve_file(self, f, context): - """Return an open file-like object as input document. + """resolve_file(self, f, context) + + Return an open file-like object as input document. Pass open file and context as parameters. """ @@ -83,7 +93,9 @@ self._default_resolver = default_resolver def add(self, Resolver resolver not None): - """Register a resolver. + """add(self, resolver) + + Register a resolver. For each requested entity, the 'resolve' method of the resolver will be called and the result will be passed to the parser. If this method @@ -94,6 +106,7 @@ self._resolvers.add(resolver) def remove(self, resolver): + "remove(self, resolver)" self._resolvers.discard(resolver) cdef _ResolverRegistry _copy(self): @@ -103,9 +116,11 @@ return registry def copy(self): + "copy(self)" return self._copy() def resolve(self, system_url, public_id, context): + "resolve(self, system_url, public_id, context)" for resolver in self._resolvers: result = resolver.resolve(system_url, public_id, context) if result is not None: Modified: lxml/branch/lxml-2.0/src/lxml/doctestcompare.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/doctestcompare.py (original) +++ lxml/branch/lxml-2.0/src/lxml/doctestcompare.py Thu Feb 14 15:52:28 2008 @@ -1,8 +1,16 @@ """ lxml-based doctest output comparison. -To use this you must call ``lxmldoctest.install()``, which will cause -doctest to use this in all subsequent calls. +Note: normally, you should just import the `lxml.usedoctest` and +`lxml.html.usedoctest` modules from within a doctest, instead of this +one:: + + >>> import lxml.usedoctest # for XML output + + >>> import lxml.html.usedoctest # for HTML output + +To use this module directly, you must call ``lxmldoctest.install()``, +which will cause doctest to use this in all subsequent calls. This changes the way output is checked and comparisons are made for XML or HTML-like content. @@ -32,7 +40,7 @@ import doctest import cgi -__all__ = ['PARSE_HTML', 'PARSE_XML', 'LXMLOutputChecker', +__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker', 'LHTMLOutputChecker', 'install', 'temp_install'] PARSE_HTML = doctest.register_optionflag('PARSE_HTML') Modified: lxml/branch/lxml-2.0/src/lxml/dtd.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/dtd.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/dtd.pxi Thu Feb 14 15:52:28 2008 @@ -20,7 +20,8 @@ # DTD cdef class DTD(_Validator): - """A DTD validator. + """DTD(self, file=None, external_id=None) + A DTD validator. Can load from filesystem directly given a filename or file-like object. Alternatively, pass the keyword parameter ``external_id`` to load from a @@ -56,7 +57,9 @@ tree.xmlFreeDtd(self._c_dtd) def __call__(self, etree): - """Validate doc using the DTD. + """__call__(self, etree) + + Validate doc using the DTD. Returns true if the document is valid, false if not. """ Modified: lxml/branch/lxml-2.0/src/lxml/extensions.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/extensions.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/extensions.pxi Thu Feb 14 15:52:28 2008 @@ -326,7 +326,9 @@ self._temp_refs.add((<_Element>o)._doc) def Extension(module, function_mapping=None, *, ns=None): - """Build a dictionary of extension functions from the functions + """Extension(module, function_mapping=None, ns=None) + + Build a dictionary of extension functions from the functions defined in a module or the methods of an object. As second argument, you can pass an additional mapping of Modified: lxml/branch/lxml-2.0/src/lxml/html/__init__.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/html/__init__.py (original) +++ lxml/branch/lxml-2.0/src/lxml/html/__init__.py Thu Feb 14 15:52:28 2008 @@ -1,3 +1,6 @@ +"""The ``lxml.html`` tool set for HTML handling. +""" + import threading import re import urlparse @@ -5,7 +8,7 @@ from lxml import etree from lxml.html import defs from lxml import cssselect -from lxml.html.setmixin import SetMixin +from lxml.html._setmixin import SetMixin try: from UserDict import DictMixin except ImportError: @@ -818,7 +821,8 @@ def keys(self): names = sets.Set() for el in self: - names.add(el.name) + if el.name is not None: + names.add(el.name) return list(names) def __iter__(self): Modified: lxml/branch/lxml-2.0/src/lxml/html/builder.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/html/builder.py (original) +++ lxml/branch/lxml-2.0/src/lxml/html/builder.py Thu Feb 14 15:52:28 2008 @@ -1,5 +1,10 @@ +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# Copyright (c) 1999-2004 by Fredrik Lundh +# -------------------------------------------------------------------- + """ -HTML specialisation of ``builder.py`` by Fredrik Lundh +A set of HTML generator tags for building HTML documents. Usage:: Modified: lxml/branch/lxml-2.0/src/lxml/html/clean.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/html/clean.py (original) +++ lxml/branch/lxml-2.0/src/lxml/html/clean.py Thu Feb 14 15:52:28 2008 @@ -1,3 +1,9 @@ +"""A cleanup tool for HTML. + +Removes unwanted tags and content. See the `Cleaner` class for +details. +""" + import re import copy import urlparse Modified: lxml/branch/lxml-2.0/src/lxml/html/diff.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/html/diff.py (original) +++ lxml/branch/lxml-2.0/src/lxml/html/diff.py Thu Feb 14 15:52:28 2008 @@ -738,29 +738,17 @@ If skip_outer is true, then don't serialize the outermost tag """ - - html_xsl = """\ - - - - - - -""" - transform = etree.XSLT(etree.XML(html_xsl)) assert not isinstance(el, basestring), ( "You should pass in an element, not a string like %r" % el) - html = str(transform(el)) + html = etree.tostring(el, method="html", encoding="UTF-8") if skip_outer: # Get rid of the extra starting tag: html = html[html.find('>')+1:] - if skip_outer: # Get rid of the extra end tag: html = html[:html.rfind('<')] - if skip_outer: return html.strip() else: - return html.lstrip() + return html def _fixup_ins_del_tags(doc): """fixup_ins_del_tags that works on an lxml document in-place Deleted: /lxml/branch/lxml-2.0/src/lxml/html/setmixin.py ============================================================================== --- /lxml/branch/lxml-2.0/src/lxml/html/setmixin.py Thu Feb 14 15:52:28 2008 +++ (empty file) @@ -1,115 +0,0 @@ -class SetMixin(object): - - """ - Mix-in for sets. You must define __iter__, add, remove - """ - - def __len__(self): - length = 0 - for item in self: - length += 1 - return length - - def __contains__(self, item): - for has_item in self: - if item == has_item: - return True - return False - - def issubset(self, other): - for item in other: - if item not in self: - return False - return True - - __le__ = issubset - - def issuperset(self, other): - for item in self: - if item not in other: - return False - return True - - __ge__ = issuperset - - def union(self, other): - return self | other - - def __or__(self, other): - new = self.copy() - new |= other - return new - - def intersection(self, other): - return self & other - - def __and__(self, other): - new = self.copy() - new &= other - return new - - def difference(self, other): - return self - other - - def __sub__(self, other): - new = self.copy() - new -= other - return new - - def symmetric_difference(self, other): - return self ^ other - - def __xor__(self, other): - new = self.copy() - new ^= other - return new - - def copy(self): - return set(self) - - def update(self, other): - for item in other: - self.add(item) - - def __ior__(self, other): - self.update(other) - return self - - def intersection_update(self, other): - for item in self: - if item not in other: - self.remove(item) - - def __iand__(self, other): - self.intersection_update(other) - return self - - def difference_update(self, other): - for item in other: - if item in self: - self.remove(item) - - def __isub__(self, other): - self.difference_update(other) - return self - - def symmetric_difference_update(self, other): - for item in other: - if item in self: - self.remove(item) - else: - self.add(item) - - def __ixor__(self, other): - self.symmetric_difference_update(other) - return self - - def discard(self, item): - try: - self.remove(item) - except KeyError: - pass - - def clear(self): - for item in list(self): - self.remove(item) Modified: lxml/branch/lxml-2.0/src/lxml/html/tests/test_diff.txt ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/html/tests/test_diff.txt (original) +++ lxml/branch/lxml-2.0/src/lxml/html/tests/test_diff.txt Thu Feb 14 15:52:28 2008 @@ -204,10 +204,7 @@
Some text and

more text

>>> pfixup(''' ...
One tableMore stuff
''') - - - -
One tableMore stuff
+
One tableMore stuff
Testing split_unbalanced:: Modified: lxml/branch/lxml-2.0/src/lxml/html/tests/test_forms.txt ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/html/tests/test_forms.txt (original) +++ lxml/branch/lxml-2.0/src/lxml/html/tests/test_forms.txt Thu Feb 14 15:52:28 2008 @@ -141,3 +141,24 @@ single_checkbox2: 'good' check_group: +>>> import lxml.html +>>> tree = lxml.html.fromstring(''' +... +...
+... +... +...
+... +... ''') +>>> tree # doctest: +ELLIPSIS + +>>> tree.forms[0] # doctest: +ELLIPSIS + +>>> tree.forms[0].fields # doctest: +NOPARSE_MARKUP + +>>> tree.forms[0].fields.keys() +['foo'] +>>> tree.forms[0].fields.items() +[('foo', 'bar')] +>>> tree.forms[0].fields.values() +['bar'] Modified: lxml/branch/lxml-2.0/src/lxml/html/usedoctest.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/html/usedoctest.py (original) +++ lxml/branch/lxml-2.0/src/lxml/html/usedoctest.py Thu Feb 14 15:52:28 2008 @@ -1,3 +1,13 @@ +"""Doctest module for HTML comparison. + +Usage:: + + >>> import lxml.html.usedoctest + >>> # now do your HTML doctests ... + +See `lxml.doctestcompare`. +""" + from lxml import doctestcompare doctestcompare.temp_install(html=True, del_module=__name__) Deleted: /lxml/branch/lxml-2.0/src/lxml/htmlbuilder.py ============================================================================== --- /lxml/branch/lxml-2.0/src/lxml/htmlbuilder.py Thu Feb 14 15:52:28 2008 +++ (empty file) @@ -1,154 +0,0 @@ -# -# HTML specialisation of ``builder.py`` by Fredrik Lundh -# -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2004 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - -""" -Usage:: - - >>> from lxml.htmlbuilder import * - >>> html = HTML( - ... HEAD( TITLE("Hello World") ), - ... BODY( CLASS("main"), - ... H1("Hello World !") - ... ) - ... ) - - >>> import lxml.etree - >>> print lxml.etree.tostring(html, pretty_print=True) - - - Hello World - - -

Hello World !

- - - -""" - -from builder import E - -# elements -A = E.a # anchor -ABBR = E.abbr # abbreviated form (e.g., WWW, HTTP, etc.) -ACRONYM = E.acronym # -ADDRESS = E.address # information on author -APPLET = E.applet # Java applet (DEPRECATED) -AREA = E.area # client-side image map area -B = E.b # bold text style -BASE = E.base # document base URI -BASEFONT = E.basefont # base font size (DEPRECATED) -BDO = E.bdo # I18N BiDi over-ride -BIG = E.big # large text style -BLOCKQUOTE = E.blockquote # long quotation -BODY = E.body # document body -BR = E.br # forced line break -BUTTON = E.button # push button -CAPTION = E.caption # table caption -CENTER = E.center # shorthand for DIV align=center (DEPRECATED) -CITE = E.cite # citation -CODE = E.code # computer code fragment -COL = E.col # table column -COLGROUP = E.colgroup # table column group -DD = E.dd # definition description -DEL = getattr(E, 'del') # deleted text -DFN = E.dfn # instance definition -DIR = E.dir # directory list (DEPRECATED) -DIV = E.div # generic language/style container -DL = E.dl # definition list -DT = E.dt # definition term -EM = E.em # emphasis -FIELDSET = E.fieldset # form control group -FONT = E.font # local change to font (DEPRECATED) -FORM = E.form # interactive form -FRAME = E.frame # subwindow -FRAMESET = E.frameset # window subdivision -H1 = E.h1 # heading -H2 = E.h2 # heading -H3 = E.h3 # heading -H4 = E.h4 # heading -H5 = E.h5 # heading -H6 = E.h6 # heading -HEAD = E.head # document head -HR = E.hr # horizontal rule -HTML = E.html # document root element -I = E.i # italic text style -IFRAME = E.iframe # inline subwindow -IMG = E.img # Embedded image -INPUT = E.input # form control -INS = E.ins # inserted text -ISINDEX = E.isindex # single line prompt (DEPRECATED) -KBD = E.kbd # text to be entered by the user -LABEL = E.label # form field label text -LEGEND = E.legend # fieldset legend -LI = E.li # list item -LINK = E.link # a media-independent link -MAP = E.map # client-side image map -MENU = E.menu # menu list (DEPRECATED) -META = E.meta # generic metainformation -NOFRAMES = E.noframes # alternate content container for non frame-based rendering -NOSCRIPT = E.noscript # alternate content container for non script-based rendering -OBJECT = E.object # generic embedded object -OL = E.ol # ordered list -OPTGROUP = E.optgroup # option group -OPTION = E.option # selectable choice -P = E.p # paragraph -PARAM = E.param # named property value -PRE = E.pre # preformatted text -Q = E.q # short inline quotation -S = E.s # strike-through text style (DEPRECATED) -SAMP = E.samp # sample program output, scripts, etc. -SCRIPT = E.script # script statements -SELECT = E.select # option selector -SMALL = E.small # small text style -SPAN = E.span # generic language/style container -STRIKE = E.strike # strike-through text (DEPRECATED) -STRONG = E.strong # strong emphasis -STYLE = E.style # style info -SUB = E.sub # subscript -SUP = E.sup # superscript -TABLE = E.table # -TBODY = E.tbody # table body -TD = E.td # table data cell -TEXTAREA = E.textarea # multi-line text field -TFOOT = E.tfoot # table footer -TH = E.th # table header cell -THEAD = E.thead # table header -TITLE = E.title # document title -TR = E.tr # table row -TT = E.tt # teletype or monospaced text style -U = E.u # underlined text style (DEPRECATED) -UL = E.ul # unordered list -VAR = E.var # instance of a variable or program argument - -# attributes (only reserved words are included here) -ATTR = dict -def CLASS(v): return {'class': v} -def FOR(v): return {'for': v} Modified: lxml/branch/lxml-2.0/src/lxml/iterparse.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/iterparse.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/iterparse.pxi Thu Feb 14 15:52:28 2008 @@ -239,9 +239,12 @@ origEnd(ctxt, name) cdef class iterparse(_BaseParser): - """Incremental parser. Parses XML into a tree and generates tuples - (event, element) in a SAX-like fashion. ``event`` is any of 'start', - 'end', 'start-ns', 'end-ns'. + """iterparse(self, source, events=("end",), tag=None, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, remove_blank_text=False, remove_comments=False, remove_pis=False, encoding=None, html=False, schema=None) + Incremental parser. + + Parses XML into a tree and generates tuples (event, element) in a + SAX-like fashion. ``event`` is any of 'start', 'end', 'start-ns', + 'end-ns'. For 'start' and 'end', ``element`` is the Element that the parser just found opening or closing. For 'start-ns', it is a tuple (prefix, URI) of @@ -262,17 +265,17 @@ attribute default values are requested. Available boolean keyword arguments: - * attribute_defaults - read default attributes from DTD - * dtd_validation - validate (if DTD is available) - * load_dtd - use DTD for parsing - * no_network - prevent network access for related files - * remove_blank_text - discard blank text nodes - * remove_comments - discard comments - * remove_pis - discard processing instructions + - attribute_defaults - read default attributes from DTD + - dtd_validation - validate (if DTD is available) + - load_dtd - use DTD for parsing + - no_network - prevent network access for related files + - remove_blank_text - discard blank text nodes + - remove_comments - discard comments + - remove_pis - discard processing instructions Other keyword arguments: - * encoding - override the document encoding - * schema - an XMLSchema to validate against + - encoding - override the document encoding + - schema - an XMLSchema to validate against """ cdef object _source cdef readonly object root @@ -397,8 +400,10 @@ cdef class iterwalk: - """A tree walker that generates events from an existing tree as if it was - parsing XML data with ``iterparse()``. + """iterwalk(self, element_or_tree, events=("end",), tag=None) + + A tree walker that generates events from an existing tree as if it + was parsing XML data with ``iterparse()``. """ cdef object _node_stack cdef object _pop_node Modified: lxml/branch/lxml-2.0/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/lxml.etree.pyx (original) +++ lxml/branch/lxml-2.0/src/lxml/lxml.etree.pyx Thu Feb 14 15:52:28 2008 @@ -1,3 +1,9 @@ +"""The ``lxml.etree`` module implements the extended ElementTree API +for XML. +""" + +__docformat__ = "restructuredtext en" + cimport tree, python, config from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs from python cimport callable, _cstr, _isString @@ -214,7 +220,9 @@ cdef class QName: - """QName wrapper. + """QName(text_or_uri, tag=None) + + QName wrapper. Pass a tag name by itself or a namespace URI and a tag name to create a qualified name. The ``text`` property holds the @@ -510,7 +518,9 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: - """Element class. References a document object and a libxml node. + """Element class. + + References a document object and a libxml node. By pointing to a Document instance, a reference is kept to _Document as long as there is some pointer to a node in it. @@ -522,7 +532,9 @@ cdef object _attrib def _init(self): - """Called after object initialisation. Custom subclasses may override + """_init(self) + + Called after object initialisation. Custom subclasses may override this if they recursively call _init() in the superclasses. """ @@ -537,7 +549,9 @@ # MANIPULATORS def __setitem__(self, x, value): - """Replaces the given subelement index or slice. + """__setitem__(self, x, value) + + Replaces the given subelement index or slice. """ cdef xmlNode* c_node cdef xmlNode* c_next @@ -571,7 +585,9 @@ moveNodeToDocument(self._doc, c_node) def __delitem__(self, x): - """Deletes the given subelement or a slice. + """__delitem__(self, x) + + Deletes the given subelement or a slice. """ cdef xmlNode* c_node cdef xmlNode* c_next @@ -599,9 +615,11 @@ _removeNode(self._doc, c_node) def __deepcopy__(self, memo): + "__deepcopy__(self, memo)" return self.__copy__() def __copy__(self): + "__copy__(self)" cdef xmlDoc* c_doc cdef xmlNode* c_node cdef _Document new_doc @@ -619,17 +637,23 @@ return _elementFactory(new_doc, c_node) def set(self, key, value): - """Sets an element attribute. + """set(self, key, value) + + Sets an element attribute. """ _setAttributeValue(self, key, value) def append(self, _Element element not None): - """Adds a subelement to the end of this element. + """append(self, element) + + Adds a subelement to the end of this element. """ _appendChild(self, element) def addnext(self, _Element element): - """Adds the element as a following sibling directly after this + """addnext(self, element) + + Adds the element as a following sibling directly after this element. This is normally used to set a processing instruction or comment after @@ -644,7 +668,9 @@ _appendSibling(self, element) def addprevious(self, _Element element): - """Adds the element as a preceding sibling directly before this + """addprevious(self, element) + + Adds the element as a preceding sibling directly before this element. This is normally used to set a processing instruction or comment @@ -659,13 +685,17 @@ _prependSibling(self, element) def extend(self, elements): - """Extends the current children by the elements in the iterable. + """extend(self, elements) + + Extends the current children by the elements in the iterable. """ for element in elements: _appendChild(self, element) def clear(self): - """Resets an element. This function removes all subelements, clears + """clear(self) + + Resets an element. This function removes all subelements, clears all attributes and sets the text and tail properties to None. """ cdef xmlAttr* c_attr @@ -693,7 +723,9 @@ c_node = c_node_next def insert(self, index, _Element element not None): - """Inserts a subelement at the given position in this element + """insert(self, index, element) + + Inserts a subelement at the given position in this element """ cdef xmlNode* c_node cdef xmlNode* c_next @@ -707,7 +739,9 @@ moveNodeToDocument(self._doc, element._c_node) def remove(self, _Element element not None): - """Removes a matching subelement. Unlike the find methods, this + """remove(self, element) + + Removes a matching subelement. Unlike the find methods, this method compares elements based on identity, not on tag value or contents. """ @@ -724,7 +758,9 @@ def replace(self, _Element old_element not None, _Element new_element not None): - """Replaces a subelement with the element passed as second argument. + """replace(self, old_element, new_element) + + Replaces a subelement with the element passed as second argument. """ cdef xmlNode* c_old_node cdef xmlNode* c_old_next @@ -862,6 +898,7 @@ # ACCESSORS def __repr__(self): + "__repr__(self)" return "" % (self.tag, id(self)) def __getitem__(self, x): @@ -901,11 +938,14 @@ return _elementFactory(self._doc, c_node) def __len__(self): - """Returns the number of subelements. + """__len__(self) + + Returns the number of subelements. """ return _countElements(self._c_node.children) def __nonzero__(self): + "__nonzero__(self)" import warnings warnings.warn( "The behavior of this method will change in future versions. " @@ -916,6 +956,7 @@ return _hasChild(self._c_node) def __contains__(self, element): + "__contains__(self, element)" cdef xmlNode* c_node if not isinstance(element, _Element): return 0 @@ -923,13 +964,17 @@ return c_node is not NULL and c_node.parent is self._c_node def __iter__(self): + "__iter__(self)" return ElementChildIterator(self) def __reversed__(self): + "__reversed__(self)" return ElementChildIterator(self, reversed=True) def index(self, _Element child not None, start=None, stop=None): - """Find the position of the child within the parent. + """index(self, child, start=None, stop=None) + + Find the position of the child within the parent. This method is not part of the original ElementTree API. """ @@ -1012,40 +1057,52 @@ raise ValueError("list.index(x): x not in list") def get(self, key, default=None): - """Gets an element attribute. + """get(self, key, default=None) + + Gets an element attribute. """ return _getAttributeValue(self, key, default) def keys(self): - """Gets a list of attribute names. The names are returned in an + """keys(self) + + Gets a list of attribute names. The names are returned in an arbitrary order (just like for an ordinary Python dictionary). """ return _collectAttributes(self._c_node, 1) def values(self): - """Gets element attribute values as a sequence of strings. The + """values(self) + + Gets element attribute values as a sequence of strings. The attributes are returned in an arbitrary order. """ return _collectAttributes(self._c_node, 2) def items(self): - """Gets element attributes, as a sequence. The attributes are returned in + """items(self) + + Gets element attributes, as a sequence. The attributes are returned in an arbitrary order. """ return _collectAttributes(self._c_node, 3) def getchildren(self): - """Returns all direct children. The elements are returned in document + """getchildren(self) + + Returns all direct children. The elements are returned in document order. - @deprecated: Note that this method has been deprecated as of - ElementTree 1.3 and lxml 2.0. New code should use - ``list(element)`` or simply iterate over elements. + :deprecated: Note that this method has been deprecated as of + ElementTree 1.3 and lxml 2.0. New code should use + ``list(element)`` or simply iterate over elements. """ return _collectChildren(self) def getparent(self): - """Returns the parent of this element or None for the root element. + """getparent(self) + + Returns the parent of this element or None for the root element. """ cdef xmlNode* c_node c_node = _parentElement(self._c_node) @@ -1055,7 +1112,9 @@ return _elementFactory(self._doc, c_node) def getnext(self): - """Returns the following sibling of this element or None. + """getnext(self) + + Returns the following sibling of this element or None. """ cdef xmlNode* c_node c_node = _nextElement(self._c_node) @@ -1064,7 +1123,9 @@ return None def getprevious(self): - """Returns the preceding sibling of this element or None. + """getprevious(self) + + Returns the preceding sibling of this element or None. """ cdef xmlNode* c_node c_node = _previousElement(self._c_node) @@ -1073,7 +1134,9 @@ return None def itersiblings(self, tag=None, *, preceding=False): - """Iterate over the following or preceding siblings of this element. + """itersiblings(self, tag=None, preceding=False) + + Iterate over the following or preceding siblings of this element. The direction is determined by the 'preceding' keyword which defaults to False, i.e. forward iteration over the following siblings. The @@ -1083,7 +1146,9 @@ return SiblingsIterator(self, tag, preceding=preceding) def iterancestors(self, tag=None): - """Iterate over the ancestors of this element (from parent to parent). + """iterancestors(self, tag=None) + + Iterate over the ancestors of this element (from parent to parent). The generated elements can be restricted to a specific tag name with the 'tag' keyword. @@ -1091,7 +1156,9 @@ return AncestorsIterator(self, tag) def iterdescendants(self, tag=None): - """Iterate over the descendants of this element in document order. + """iterdescendants(self, tag=None) + + Iterate over the descendants of this element in document order. As opposed to ``el.iter()``, this iterator does not yield the element itself. The generated elements can be restricted to a specific tag @@ -1100,7 +1167,9 @@ return ElementDepthFirstIterator(self, tag, inclusive=False) def iterchildren(self, tag=None, *, reversed=False): - """Iterate over the children of this element. + """iterchildren(self, tag=None, reversed=False) + + Iterate over the children of this element. As opposed to using normal iteration on this element, the generated elements can be restricted to a specific tag name with the 'tag' @@ -1109,7 +1178,9 @@ return ElementChildIterator(self, tag, reversed=reversed) def getroottree(self): - """Return an ElementTree for the root node of the document that + """getroottree(self) + + Return an ElementTree for the root node of the document that contains this element. This is the same as following element.getparent() up the tree until it @@ -1118,7 +1189,9 @@ return _elementTreeFactory(self._doc, None) def getiterator(self, tag=None): - """Returns a sequence or iterator of all elements in the subtree in + """getiterator(self, tag=None) + + Returns a sequence or iterator of all elements in the subtree in document order (depth first pre-order), starting with this element. @@ -1128,18 +1201,20 @@ You can also pass the Element, Comment, ProcessingInstruction and Entity factory functions to look only for the specific element type. - @deprecated: Note that this method is deprecated as of - ElementTree 1.3 and lxml 2.0. It returns an iterator in lxml, - which diverges from the original ElementTree behaviour. If - you want an efficient iterator, use the ``element.iter()`` - method instead. You should only use this method in new code - if you require backwards compatibility with older versions of - lxml or ElementTree. + :deprecated: Note that this method is deprecated as of + ElementTree 1.3 and lxml 2.0. It returns an iterator in + lxml, which diverges from the original ElementTree + behaviour. If you want an efficient iterator, use the + ``element.iter()`` method instead. You should only use this + method in new code if you require backwards compatibility + with older versions of lxml or ElementTree. """ return ElementDepthFirstIterator(self, tag) def iter(self, tag=None): - """Iterate over all elements in the subtree in document order (depth + """iter(self, tag=None) + + Iterate over all elements in the subtree in document order (depth first pre-order), starting with this element. Can be restricted to find only elements with a specific tag @@ -1151,7 +1226,9 @@ return ElementDepthFirstIterator(self, tag) def itertext(self, tag=None, *, with_tail=True): - """Iterates over the text content of a subtree. + """itertext(self, tag=None, with_tail=True) + + Iterates over the text content of a subtree. You can pass the ``tag`` keyword argument to restrict text content to a specific tag name. @@ -1162,41 +1239,53 @@ return ElementTextIterator(self, tag, with_tail=with_tail) def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): - """Creates a new element associated with the same document. + """makeelement(self, _tag, attrib=None, nsmap=None, **_extra) + + Creates a new element associated with the same document. """ return _makeElement(_tag, NULL, self._doc, None, None, None, attrib, nsmap, _extra) def find(self, path): - """Finds the first matching subelement, by tag name or path. + """find(self, path) + + Finds the first matching subelement, by tag name or path. """ if isinstance(path, QName): path = (path).text return _elementpath.find(self, path) def findtext(self, path, default=None): - """Finds text for the first matching subelement, by tag name or path. + """findtext(self, path, default=None) + + Finds text for the first matching subelement, by tag name or path. """ if isinstance(path, QName): path = (path).text return _elementpath.findtext(self, path, default) def findall(self, path): - """Finds all matching subelements, by tag name or path. + """findall(self, path) + + Finds all matching subelements, by tag name or path. """ if isinstance(path, QName): path = (path).text return _elementpath.findall(self, path) def iterfind(self, path): - """Iterates over all matching subelements, by tag name or path. + """iterfind(self, path) + + Iterates over all matching subelements, by tag name or path. """ if isinstance(path, QName): path = (path).text return _elementpath.iterfind(self, path) def xpath(self, _path, *, namespaces=None, extensions=None, **_variables): - """Evaluate an xpath expression using the element as context node. + """xpath(self, _path, namespaces=None, extensions=None, **_variables) + + Evaluate an xpath expression using the element as context node. """ evaluator = XPathElementEvaluator(self, namespaces=namespaces, extensions=extensions) @@ -1260,18 +1349,19 @@ raise TypeError("this element does not have children or attributes") def set(self, key, value): + "set(self, key, value)" self._raiseImmutable() def append(self, value): + "append(self, value)" self._raiseImmutable() def insert(self, index, value): + "insert(self, index, value)" self._raiseImmutable() def __setitem__(self, index, value): - self._raiseImmutable() - - def __setslice__(self, start, end, value): + "__setitem__(self, index, value)" self._raiseImmutable() property attrib: @@ -1297,24 +1387,30 @@ # ACCESSORS def __getitem__(self, x): + "__getitem__(self, x)" if python.PySlice_Check(x): return [] else: raise IndexError("list index out of range") def __len__(self): + "__len__(self)" return 0 def get(self, key, default=None): + "get(self, key, default=None)" return None def keys(self): + "keys(self)" return [] def items(self): + "items(self)" return [] def values(self): + "values(self)" return [] cdef class _Comment(__ContentOnlyElement): @@ -1393,7 +1489,9 @@ "ElementTree not initialized, missing root" def parse(self, source, _BaseParser parser=None): - """Updates self with the content of source and returns its root + """parse(self, source, parser=None) + + Updates self with the content of source and returns its root """ cdef _Document doc doc = _parseDocument(source, parser) @@ -1405,7 +1503,9 @@ return self._context_node def _setroot(self, _Element root not None): - """Relocate the ElementTree to a new root node. + """_setroot(self, root) + + Relocate the ElementTree to a new root node. """ if root._c_node.type != tree.XML_ELEMENT_NODE: raise TypeError("Only elements can be the root of an ElementTree") @@ -1413,7 +1513,9 @@ self._doc = None def getroot(self): - """Gets the root element for this tree. + """getroot(self) + + Gets the root element for this tree. """ return self._context_node @@ -1448,7 +1550,10 @@ def write(self, file, *, encoding=None, method="xml", pretty_print=False, xml_declaration=None, with_tail=True): - """Write the tree to a file or file-like object. + """write(self, file, encoding=None, method="xml", + pretty_print=False, xml_declaration=None, with_tail=True) + + Write the tree to a file or file-like object. Defaults to ASCII encoding and writing a declaration as needed. @@ -1473,7 +1578,9 @@ write_declaration, 1, pretty_print, with_tail) def getpath(self, _Element element not None): - """Returns a structural, absolute XPath expression to find that element. + """getpath(self, element) + + Returns a structural, absolute XPath expression to find that element. """ cdef _Document doc cdef xmlDoc* c_doc @@ -1491,7 +1598,9 @@ return path def getiterator(self, tag=None): - """Returns a sequence or iterator of all elements in document order + """getiterator(self, tag=None) + + Returns a sequence or iterator of all elements in document order (depth first pre-order), starting with the root element. Can be restricted to find only elements with a specific tag @@ -1501,13 +1610,13 @@ You can also pass the Element, Comment, ProcessingInstruction and Entity factory functions to look only for the specific element type. - @deprecated: Note that this method is deprecated as of - ElementTree 1.3 and lxml 2.0. It returns an iterator in lxml, - which diverges from the original ElementTree behaviour. If - you want an efficient iterator, use the ``tree.iter()`` method - instead. You should only use this method in new code if you - require backwards compatibility with older versions of lxml or - ElementTree. + :deprecated: Note that this method is deprecated as of + ElementTree 1.3 and lxml 2.0. It returns an iterator in + lxml, which diverges from the original ElementTree + behaviour. If you want an efficient iterator, use the + ``tree.iter()`` method instead. You should only use this + method in new code if you require backwards compatibility + with older versions of lxml or ElementTree. """ root = self.getroot() if root is None: @@ -1515,7 +1624,9 @@ return root.getiterator(tag) def iter(self, tag=None): - """Creates an iterator for the root element. The iterator loops over + """iter(self, tag=None) + + Creates an iterator for the root element. The iterator loops over all elements in this tree, in document order. """ root = self.getroot() @@ -1524,7 +1635,9 @@ return root.iter(tag) def find(self, path): - """Finds the first toplevel element with given tag. Same as + """find(self, path) + + Finds the first toplevel element with given tag. Same as ``tree.getroot().find(path)``. """ self._assertHasRoot() @@ -1534,7 +1647,9 @@ return root.find(path) def findtext(self, path, default=None): - """Finds the text for the first element matching the ElementPath + """findtext(self, path, default=None) + + Finds the text for the first element matching the ElementPath expression. Same as getroot().findtext(path) """ self._assertHasRoot() @@ -1544,7 +1659,9 @@ return root.findtext(path, default) def findall(self, path): - """Finds all elements matching the ElementPath expression. Same as + """findall(self, path) + + Finds all elements matching the ElementPath expression. Same as getroot().findall(path). """ self._assertHasRoot() @@ -1554,7 +1671,9 @@ return root.findall(path) def iterfind(self, path): - """Iterates over all elements matching the ElementPath expression. + """iterfind(self, path) + + Iterates over all elements matching the ElementPath expression. Same as getroot().finditer(path). """ self._assertHasRoot() @@ -1564,7 +1683,9 @@ return root.iterfind(path) def xpath(self, _path, *, namespaces=None, extensions=None, **_variables): - """XPath evaluate in context of document. + """xpath(self, _path, namespaces=None, extensions=None, **_variables) + + XPath evaluate in context of document. ``namespaces`` is an optional dictionary with prefix to namespace URI mappings, used by XPath. ``extensions`` defines additional extension @@ -1585,7 +1706,9 @@ return evaluator.evaluate(_path, **_variables) def xslt(self, _xslt, extensions=None, access_control=None, **_kw): - """Transform this document using other document. + """xslt(self, _xslt, extensions=None, access_control=None, **_kw) + + Transform this document using other document. xslt is a tree that should be XSLT keyword parameters are XSLT transformation parameters. @@ -1602,7 +1725,9 @@ return style(self, **_kw) def relaxng(self, relaxng): - """Validate this document using other document. + """relaxng(self, relaxng) + + Validate this document using other document. The relaxng argument is a tree that should contain a Relax NG schema. @@ -1618,7 +1743,9 @@ return schema.validate(self) def xmlschema(self, xmlschema): - """Validate this document using other document. + """xmlschema(self, xmlschema) + + Validate this document using other document. The xmlschema argument is a tree that should contain an XML Schema. @@ -1634,7 +1761,9 @@ return schema.validate(self) def xinclude(self): - """Process the XInclude nodes in this document and include the + """xinclude(self) + + Process the XInclude nodes in this document and include the referenced XML fragments. There is support for loading files through the file system, HTTP and @@ -1648,7 +1777,9 @@ XInclude()(self._context_node) def write_c14n(self, file): - """C14N write of document. Always writes UTF-8. + """write_c14n(self, file) + + C14N write of document. Always writes UTF-8. """ self._assertHasRoot() _tofilelikeC14N(file, self._context_node) @@ -1669,9 +1800,7 @@ cdef class _Attrib: - """A proxy for the ``Element.attrib`` property. - - Behaves as a normal Python dict. + """A dict-like proxy for the ``Element.attrib`` property. """ cdef _Element _element def __init__(self, _Element element not None): @@ -1898,7 +2027,9 @@ return current_node cdef class ElementChildIterator(_ElementIterator): - "Iterates over the children of an element." + """ElementChildIterator(self, node, tag=None, reversed=False) + Iterates over the children of an element. + """ def __init__(self, _Element node not None, tag=None, *, reversed=False): cdef xmlNode* c_node self._initTagMatch(tag) @@ -1919,7 +2050,8 @@ self._node = _elementFactory(node._doc, c_node) cdef class SiblingsIterator(_ElementIterator): - """Iterates over the siblings of an element. + """SiblingsIterator(self, node, tag=None, preceding=False) + Iterates over the siblings of an element. You can pass the boolean keyword ``preceding`` to specify the direction. """ @@ -1932,18 +2064,24 @@ self._storeNext(node) cdef class AncestorsIterator(_ElementIterator): - "Iterates over the ancestors of an element (from parent to parent)." + """AncestorsIterator(self, node, tag=None) + Iterates over the ancestors of an element (from parent to parent). + """ def __init__(self, _Element node not None, tag=None): self._initTagMatch(tag) self._next_element = _parentElement self._storeNext(node) cdef class ElementDepthFirstIterator(_ElementTagMatcher): - """Iterates over an element and its sub-elements in document order (depth - first pre-order). Note that this also includes comments, entities and - processing instructions. To filter them out, check if the ``tag`` - property of the returned element is a string (i.e. not None and not a - factory function), or pass the ``Element`` factory for the ``tag`` keyword. + """ElementDepthFirstIterator(self, node, tag=None, inclusive=True) + Iterates over an element and its sub-elements in document order (depth + first pre-order). + + Note that this also includes comments, entities and processing + instructions. To filter them out, check if the ``tag`` property + of the returned element is a string (i.e. not None and not a + factory function), or pass the ``Element`` factory for the ``tag`` + keyword. If the optional ``tag`` argument is not None, the iterator returns only the elements that match the respective name and namespace. @@ -2006,7 +2144,8 @@ return NULL cdef class ElementTextIterator: - """Iterates over the text content of a subtree. + """ElementTextIterator(self, element, tag=None, with_tail=True) + Iterates over the text content of a subtree. You can pass the ``tag`` keyword argument to restrict text content to a specific tag name. @@ -2060,7 +2199,9 @@ # module-level API for ElementTree def Element(_tag, attrib=None, nsmap=None, **_extra): - """Element factory. This function returns an object implementing the + """Element(_tag, attrib=None, nsmap=None, **_extra) + + Element factory. This function returns an object implementing the Element interface. """ ### also look at _Element.makeelement() and _BaseParser.makeelement() ### @@ -2068,7 +2209,9 @@ attrib, nsmap, _extra) def Comment(text=None): - """Comment element factory. This factory function creates a special element that will + """Comment(text=None) + + Comment element factory. This factory function creates a special element that will be serialized as an XML comment. """ cdef _Document doc @@ -2085,7 +2228,9 @@ return _elementFactory(doc, c_node) def ProcessingInstruction(target, text=None): - """ProcessingInstruction element factory. This factory function creates a + """ProcessingInstruction(target, text=None) + + ProcessingInstruction element factory. This factory function creates a special element that will be serialized as an XML processing instruction. """ cdef _Document doc @@ -2105,7 +2250,9 @@ PI = ProcessingInstruction def Entity(name): - """Entity factory. This factory function creates a special element + """Entity(name) + + Entity factory. This factory function creates a special element that will be serialized as an XML entity reference or character reference. Note, however, that entities will not be automatically declared in the document. A document that uses entity references @@ -2130,13 +2277,17 @@ def SubElement(_Element _parent not None, _tag, attrib=None, nsmap=None, **_extra): - """Subelement factory. This function creates an element instance, and + """SubElement(_parent, _tag, attrib=None, nsmap=None, **_extra) + + Subelement factory. This function creates an element instance, and appends it to an existing element. """ return _makeSubElement(_parent, _tag, None, None, attrib, nsmap, _extra) def ElementTree(_Element element=None, *, file=None, _BaseParser parser=None): - """ElementTree wrapper class. + """ElementTree(element=None, file=None, parser=None) + + ElementTree wrapper class. """ cdef xmlNode* c_next cdef xmlNode* c_node @@ -2159,7 +2310,9 @@ return _elementTreeFactory(doc, element) def HTML(text, _BaseParser parser=None, *, base_url=None): - """Parses an HTML document from a string constant. This function can be used + """HTML(text, parser=None, base_url=None) + + Parses an HTML document from a string constant. This function can be used to embed "HTML literals" in Python code. To override the parser with a different ``HTMLParser`` you can pass it to @@ -2181,7 +2334,9 @@ return result_container.result def XML(text, _BaseParser parser=None, *, base_url=None): - """Parses an XML document from a string constant. This function can be used + """XML(text, parser=None, base_url=None) + + Parses an XML document from a string constant. This function can be used to embed "XML literals" in Python code, like in >>> root = etree.XML("") @@ -2205,7 +2360,9 @@ return result_container.result def fromstring(text, _BaseParser parser=None, *, base_url=None): - """Parses an XML document from a string. + """fromstring(text, parser=None, base_url=None) + + Parses an XML document from a string. To override the default parser with a different parser you can pass it to the ``parser`` keyword argument. @@ -2222,7 +2379,9 @@ return result_container.result def fromstringlist(strings, _BaseParser parser=None): - """Parses an XML document from a sequence of strings. + """fromstringlist(strings, parser=None) + + Parses an XML document from a sequence of strings. To override the default parser with a different parser you can pass it to the ``parser`` keyword argument. @@ -2236,19 +2395,26 @@ return parser.close() def iselement(element): - """Checks if an object appears to be a valid element object. + """iselement(element) + + Checks if an object appears to be a valid element object. """ return isinstance(element, _Element) def dump(_Element elem not None, *, pretty_print=True, with_tail=True): - """Writes an element tree or element structure to sys.stdout. This function + """dump(elem, pretty_print=True, with_tail=True) + + Writes an element tree or element structure to sys.stdout. This function should be used for debugging only. """ _dumpToFile(sys.stdout, elem._c_node, pretty_print, with_tail) def tostring(element_or_tree, *, encoding=None, method="xml", xml_declaration=None, pretty_print=False, with_tail=True): - """Serialize an element to an encoded string representation of its XML + """tostring(element_or_tree, encoding=None, method="xml", + xml_declaration=None, pretty_print=False, with_tail=True) + + Serialize an element to an encoded string representation of its XML tree. Defaults to ASCII encoding without XML declaration. This behaviour can be @@ -2295,7 +2461,9 @@ type(element_or_tree)) def tostringlist(element_or_tree, *args, **kwargs): - """Serialize an element to an encoded string representation of its XML + """tostringlist(element_or_tree, *args, **kwargs) + + Serialize an element to an encoded string representation of its XML tree, stored in a list of partial strings. This is purely for ElementTree 1.3 compatibility. The result is a @@ -2305,7 +2473,10 @@ def tounicode(element_or_tree, *, method="xml", pretty_print=False, with_tail=True): - """Serialize an element to the Python unicode representation of its XML + """tounicode(element_or_tree, method="xml", pretty_print=False, + with_tail=True) + + Serialize an element to the Python unicode representation of its XML tree. Note that the result does not carry an XML encoding declaration and is @@ -2321,7 +2492,7 @@ by passing the boolean ``with_tail`` option. This has no impact on the tail text of children, which will always be serialised. - @deprecated: use ``tostring(el, encoding=unicode)`` instead. + :deprecated: use ``tostring(el, encoding=unicode)`` instead. """ if isinstance(element_or_tree, _Element): return _tounicode(<_Element>element_or_tree, method, 0, pretty_print, @@ -2334,7 +2505,9 @@ type(element_or_tree)) def parse(source, _BaseParser parser=None): - """Return an ElementTree object loaded with source elements. If no parser + """parse(source, parser=None) + + Return an ElementTree object loaded with source elements. If no parser is provided as second argument, the default parser is used. """ cdef _Document doc @@ -2369,8 +2542,10 @@ # Validation class DocumentInvalid(LxmlError): - """Validation error. Raised by all document validators when their - ``assertValid(tree)`` method fails. + """Validation error. + + Raised by all document validators when their ``assertValid(tree)`` + method fails. """ pass @@ -2378,28 +2553,39 @@ "Base class for XML validators." cdef _ErrorLog _error_log def __init__(self): + "__init__(self)" self._error_log = _ErrorLog() def validate(self, etree): - """Validate the document using this schema. + """validate(self, etree) + + Validate the document using this schema. - Returns true if document is valid, false if not.""" + Returns true if document is valid, false if not. + """ return self(etree) def assertValid(self, etree): - "Raises DocumentInvalid if the document does not comply with the schema." + """assertValid(self, etree) + + Raises `DocumentInvalid` if the document does not comply with the schema. + """ if not self(etree): raise DocumentInvalid(self._error_log._buildExceptionMessage( "Document does not comply with schema"), self._error_log) def assert_(self, etree): - "Raises AssertionError if the document does not comply with the schema." + """assert_(self, etree) + + Raises `AssertionError` if the document does not comply with the schema. + """ if not self(etree): raise AssertionError(self._error_log._buildExceptionMessage( "Document does not comply with schema")) property error_log: + "The log of validation errors and warnings." def __get__(self): return self._error_log.copy() Modified: lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx (original) +++ lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx Thu Feb 14 15:52:28 2008 @@ -1,3 +1,7 @@ +"""The ``lxml.objectify`` module implements a Python object API for +XML. It is based on `lxml.etree`. +""" + from etreepublic cimport _Document, _Element, ElementBase from etreepublic cimport _ElementIterator, ElementClassLookup from etreepublic cimport elementFactory, import_lxml__etree, textOf @@ -153,7 +157,9 @@ return _countSiblings(self._c_node) def countchildren(self): - """Return the number of children of this element, regardless of their + """countchildren(self) + + Return the number of children of this element, regardless of their name. """ # copied from etree @@ -168,7 +174,9 @@ return c def getchildren(self): - """Returns a sequence of all direct children. The elements are + """getchildren(self) + + Returns a sequence of all direct children. The elements are returned in document order. """ cdef tree.xmlNode* c_node @@ -217,7 +225,9 @@ self.remove(child) def addattr(self, tag, value): - """Add a child value to the element. + """addattr(self, tag, value) + + Add a child value to the element. As opposed to append(), it sets a data value, not an element. """ @@ -242,13 +252,7 @@ if python._isString(key): return _lookupChildOrRaise(self, key) elif python.PySlice_Check(key): - python.PySlice_GetIndicesEx( - key, _countSiblings(self._c_node), - &start, &stop, &step, &slicelength) - if step < 0: - return list(self)[start:stop:step] - else: - return list(islice(self, start, stop, step)) + return list(self)[key] # normal item access c_self_node = self._c_node c_parent = c_self_node.parent @@ -269,7 +273,8 @@ def __setitem__(self, key, value): """Set the value of a sibling, counting from the first child of the - parent. + parent. Implements key assignment, item assignment and slice + assignment. * If argument is an integer, sets the sibling at that position. @@ -280,12 +285,7 @@ items to the siblings. """ cdef _Element element - cdef _Element parent - cdef _Element new_element - cdef tree.xmlNode* c_self_node - cdef tree.xmlNode* c_parent cdef tree.xmlNode* c_node - cdef Py_ssize_t start, stop, step, slicelength if python._isString(key): key = _buildChildTag(self, key) element = _lookupChild(self, key) @@ -295,48 +295,21 @@ _replaceElement(element, value) return - c_self_node = self._c_node - c_parent = c_self_node.parent - if c_parent is NULL: + if self._c_node.parent is NULL: # the 'root[i] = ...' case raise TypeError("assignment to root element is invalid") if python.PySlice_Check(key): # slice assignment - python.PySlice_GetIndicesEx( - key, _countSiblings(self._c_node), - &start, &stop, &step, &slicelength) - # replace existing items - new_items = iter(value) - if step < 0: - del_items = list(self)[start:stop:step] - else: - del_items = list(islice(self, start, stop, step)) - del_items = iter(del_items) - parent = self.getparent() - try: - for el in del_items: - item = new_items.next() - _replaceElement(el, item) - except StopIteration: - remove = parent.remove - remove(el) - for el in del_items: - remove(el) - return - else: - # append remaining new items - tag = self.tag - for item in new_items: - _appendValue(parent, tag, item) + _setSlice(key, self, value) else: # normal index assignment if key < 0: - c_node = c_parent.last + c_node = self._c_node.parent.last else: - c_node = c_parent.children + c_node = self._c_node.parent.children c_node = _findFollowingSibling( - c_node, tree._getNs(c_self_node), c_self_node.name, key) + c_node, tree._getNs(self._c_node), self._c_node.name, key) if c_node is NULL: raise IndexError(key) element = elementFactory(self._doc, c_node) @@ -368,18 +341,21 @@ parent.remove(sibling) def iterfind(self, path): + "iterfind(self, path)" # Reimplementation of Element.iterfind() to make it work without child # iteration. xpath = etree.ETXPath(path) return iter(xpath(self)) def findall(self, path): + "findall(self, path)" # Reimplementation of Element.findall() to make it work without child # iteration. xpath = etree.ETXPath(path) return xpath(self) def find(self, path): + "find(self, path)" # Reimplementation of Element.find() to make it work without child # iteration. result = self.findall(path) @@ -391,6 +367,7 @@ return None def findtext(self, path, default=None): + "findtext(self, path, default=None)" # Reimplementation of Element.findtext() to make it work without child # iteration. result = self.find(path) @@ -400,7 +377,9 @@ return default def descendantpaths(self, prefix=None): - """Returns a list of object path expressions for all descendants. + """descendantpaths(self, prefix=None) + + Returns a list of object path expressions for all descendants. """ if prefix is not None and not python._isString(prefix): prefix = '.'.join(prefix) @@ -538,6 +517,81 @@ PYTYPE_ATTRIBUTE_NAME) cetree.setNodeText(element._c_node, value) +cdef _setSlice(slice, _Element target, items): + cdef _Element parent + cdef tree.xmlNode* c_node + cdef Py_ssize_t c_step, c_start, pos + # collect existing slice + if (slice).step is None: + c_step = 1 + else: + c_step = (slice).step + if c_step == 0: + raise ValueError("Invalid slice") + del_items = target[slice] + + # collect new values + new_items = [] + tag = target.tag + for item in items: + if isinstance(item, _Element): + # deep copy the new element + new_element = cetree.deepcopyNodeToDocument( + target._doc, (<_Element>item)._c_node) + new_element.tag = tag + else: + new_element = cetree.makeElement( + tag, target._doc, None, None, None, None, None) + _setElementValue(new_element, item) + python.PyList_Append(new_items, new_element) + + # sanity check - raise what a list would raise + if c_step != 1 and \ + python.PyList_GET_SIZE(del_items) != python.PyList_GET_SIZE(new_items): + raise ValueError( + "attempt to assign sequence of size %d to extended slice of size %d" % ( + python.PyList_GET_SIZE(new_items), + python.PyList_GET_SIZE(del_items))) + + # replace existing items + pos = 0 + parent = target.getparent() + replace = parent.replace + while pos < python.PyList_GET_SIZE(new_items) and \ + pos < python.PyList_GET_SIZE(del_items): + replace(del_items[pos], new_items[pos]) + pos += 1 + # remove leftover items + if pos < python.PyList_GET_SIZE(del_items): + remove = parent.remove + while pos < python.PyList_GET_SIZE(del_items): + remove(del_items[pos]) + pos += 1 + # append remaining new items + if pos < python.PyList_GET_SIZE(new_items): + # the sanity check above guarantees (step == 1) + if pos > 0: + item = new_items[pos-1] + else: + if (slice).start > 0: + c_node = parent._c_node.children + else: + c_node = parent._c_node.last + c_node = _findFollowingSibling( + c_node, tree._getNs(target._c_node), target._c_node.name, + (slice).start - 1) + if c_node is NULL: + while pos < python.PyList_GET_SIZE(new_items): + cetree.appendChild(parent, new_items[pos]) + pos += 1 + return + item = cetree.elementFactory(parent._doc, c_node) + while pos < python.PyList_GET_SIZE(new_items): + add = item.addnext + item = new_items[pos] + add(item) + pos += 1 + ################################################################################ # Data type support in subclasses @@ -815,7 +869,8 @@ # Python type registry cdef class PyType: - """User defined type. + """PyType(self, name, type_check, type_class, stringify=None) + User defined type. Named type that contains a type check function and a type class that inherits from ObjectifiedDataElement. The type check must take a string @@ -824,6 +879,7 @@ guessing. Example:: + PyType('int', int, MyIntClass).register() Note that the order in which types are registered matters. The first @@ -856,7 +912,9 @@ return "PyType(%s, %s)" % (self.name, self._type.__name__) def register(self, before=None, after=None): - """Register the type. + """register(self, before=None, after=None) + + Register the type. The additional keyword arguments 'before' and 'after' accept a sequence of type names that must appear before/after the new type in @@ -895,6 +953,7 @@ _SCHEMA_TYPE_DICT[xs_type] = self def unregister(self): + "unregister(self)" if _PYTYPE_DICT.get(self.name) is self: del _PYTYPE_DICT[self.name] for xs_type, pytype in _SCHEMA_TYPE_DICT.items(): @@ -951,7 +1010,9 @@ return _typename(obj) def pytypename(obj): - """Find the name of the corresponding PyType for a Python object. + """pytypename(obj) + + Find the name of the corresponding PyType for a Python object. """ return _pytypename(obj) @@ -997,7 +1058,9 @@ _registerPyTypes() def getRegisteredTypes(): - """Returns a list of the currently registered PyType objects. + """getRegisteredTypes() + + Returns a list of the currently registered PyType objects. To add a new type, retrieve this list and call unregister() for all entries. Then add the new type at a suitable position (possibly replacing @@ -1061,6 +1124,8 @@ cdef _ObjectifyElementMakerCaller NEW_ELEMENT_MAKER "PY_NEW" (object t) cdef class ElementMaker: + """ElementMaker(self, namespace=None, nsmap=None, annotate=True, makeelement=None) + """ cdef object _makeelement cdef object _namespace cdef object _nsmap @@ -1099,6 +1164,7 @@ cdef bint _annotate def __call__(self, *children, **attrib): + "__call__(self, *children, **attrib)" cdef _ObjectifyElementMakerCaller elementMaker cdef python.PyObject* pytype cdef _Element element @@ -1176,14 +1242,18 @@ __RECURSIVE_STR = 0 # default: off def enableRecursiveStr(on=True): - """Enable a recursively generated tree representation for str(element), + """enableRecursiveStr(on=True) + + Enable a recursively generated tree representation for str(element), based on objectify.dump(element). """ global __RECURSIVE_STR __RECURSIVE_STR = on def dump(_Element element not None): - """Return a recursively generated string representation of an element. + """dump(_Element element not None) + + Return a recursively generated string representation of an element. """ return _dump(element, 0) @@ -1230,6 +1300,7 @@ copy_reg.pickle(ObjectifiedElement, reduceFunction, fromstring) def pickleReduce(obj): + "pickleReduce(obj)" return (fromstring, (etree.tostring(obj),)) _setupPickle(pickleReduce) @@ -1239,7 +1310,8 @@ # Element class lookup cdef class ObjectifyElementClassLookup(ElementClassLookup): - """Element class lookup method that uses the objectify classes. + """ObjectifyElementClassLookup(self, tree_class=None, empty_data_class=None) + Element class lookup method that uses the objectify classes. """ cdef object empty_data_class cdef object tree_class @@ -1325,7 +1397,9 @@ def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False, empty_pytype=None): - """Recursively annotates the elements of an XML tree with 'pytype' + """pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, empty_pytype=None) + + Recursively annotates the elements of an XML tree with 'pytype' attributes. If the 'ignore_old' keyword argument is True (the default), current 'pytype' @@ -1346,7 +1420,9 @@ def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False, empty_type=None): - """Recursively annotates the elements of an XML tree with 'xsi:type' + """xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None) + + Recursively annotates the elements of an XML tree with 'xsi:type' attributes. If the 'ignore_old' keyword argument is True (the default), current @@ -1373,7 +1449,9 @@ def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1): - """Recursively annotates the elements of an XML tree with 'xsi:type' + """annotate(element_or_tree, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1) + + Recursively annotates the elements of an XML tree with 'xsi:type' and/or 'py:pytype' attributes. If the 'ignore_old' keyword argument is True (the default), current @@ -1559,7 +1637,9 @@ tree.END_FOR_EACH_ELEMENT_FROM(c_node) def deannotate(element_or_tree, *, pytype=True, xsi=True): - """Recursively de-annotate the elements of an XML tree by removing 'pytype' + """deannotate(element_or_tree, pytype=True, xsi=True) + + Recursively de-annotate the elements of an XML tree by removing 'pytype' and/or 'type' attributes. If the 'pytype' keyword argument is True (the default), 'pytype' attributes @@ -1604,11 +1684,13 @@ objectify_parser = __DEFAULT_PARSER def setDefaultParser(new_parser = None): - "This function is deprecated, use ``set_default_parser()`` instead." + ":deprecated: use ``set_default_parser()`` instead." set_default_parser(new_parser) def set_default_parser(new_parser = None): - """Replace the default parser used by objectify's Element() and + """set_default_parser(new_parser = None) + + Replace the default parser used by objectify's Element() and fromstring() functions. The new parser must be an etree.XMLParser. @@ -1624,7 +1706,9 @@ raise TypeError("parser must inherit from lxml.etree.XMLParser") def makeparser(**kw): - """Create a new XML parser for objectify trees. + """makeparser(remove_blank_text=True, **kw) + + Create a new XML parser for objectify trees. You can pass all keyword arguments that are supported by ``etree.XMLParser()``. Note that this parser defaults to removing @@ -1647,7 +1731,9 @@ _fromstring = etree.fromstring def fromstring(xml, parser=None): - """Objectify specific version of the lxml.etree fromstring() function + """fromstring(xml, parser=None) + + Objectify specific version of the lxml.etree fromstring() function that uses the objectify parser. You can pass a different parser as second argument. @@ -1662,7 +1748,9 @@ _parse = etree.parse def parse(f, parser=None): - """Parse a file or file-like object with the objectify parser. + """parse(f, parser=None) + + Parse a file or file-like object with the objectify parser. You can pass a different parser as second argument. """ @@ -1678,7 +1766,9 @@ E = ElementMaker() def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes): - """Objectify specific version of the lxml.etree Element() factory that + """Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes) + + Objectify specific version of the lxml.etree Element() factory that always creates a structural (tree) element. NOTE: requires parser based element class lookup activated in lxml.etree! @@ -1696,7 +1786,9 @@ def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None, **_attributes): - """Create a new element from a Python value and XML attributes taken from + """DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, **_attributes) + + Create a new element from a Python value and XML attributes taken from keyword arguments or a dictionary passed as second argument. Automatically adds a 'pytype' attribute for the Python type of the value, Modified: lxml/branch/lxml-2.0/src/lxml/lxml.pyclasslookup.pyx ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/lxml.pyclasslookup.pyx (original) +++ lxml/branch/lxml-2.0/src/lxml/lxml.pyclasslookup.pyx Thu Feb 14 15:52:28 2008 @@ -1,3 +1,39 @@ +""" +A whole-tree Element class lookup scheme for `lxml.etree`. + +This class lookup scheme allows access to the entire XML tree in +read-only mode. To use it, let a class inherit from +`PythonElementClassLookup` and re-implement the ``lookup(self, doc, +root)`` method: + + >>> from lxml import etree, pyclasslookup + >>> + >>> class MyElementClass(etree.ElementBase): + ... honkey = True + ... + >>> class MyLookup(pyclasslookup.PythonElementClassLookup): + ... def lookup(self, doc, root): + ... if root.tag == "sometag": + ... return MyElementClass + ... else: + ... for child in root: + ... if child.tag == "someothertag": + ... return MyElementClass + ... # delegate to default + ... return None + +Note that the API of the Element objects is not complete. It is +purely read-only and does not support all features of the normal +`lxml.etree` API (such as XPath, extended slicing or some iteration +methods). + +Also, you cannot wrap such a read-only Element in an ElementTree, and +you must take care not to keep a reference to them outside of the +`lookup()` method. + +See http://codespeak.net/lxml/element_classes.html +""" + from etreepublic cimport _Document, _Element, ElementBase from etreepublic cimport ElementClassLookup, FallbackElementClassLookup from etreepublic cimport elementFactory, import_lxml__etree @@ -17,6 +53,7 @@ __version__ = etree.__version__ cdef class _ElementProxy: + "The main read-only Element proxy class (for internal use only!)." cdef tree.xmlNode* _c_node cdef object _source_proxy cdef object _dependent_proxies @@ -128,6 +165,21 @@ c_node = cetree.findChildBackwards(self._c_node, 0) return c_node != NULL + def __iter__(self): + return iter(self.getchildren()) + + def iterchildren(self, tag=None, *, reversed=False): + """iterchildren(self, tag=None, reversed=False) + + Iterate over the children of this element. + """ + children = self.getchildren() + if tag is not None: + children = [ el for el in children if el.tag == tag ] + if reversed: + children = children[::-1] + return iter(children) + def get(self, key, default=None): """Gets an element attribute. """ @@ -155,7 +207,7 @@ self._assertNode() return cetree.collectAttributes(self._c_node, 3) - def getchildren(self): + cpdef getchildren(self): """Returns all subelements. The elements are returned in document order. """ @@ -201,15 +253,21 @@ return _newProxy(self._source_proxy, c_node) return None + +cdef extern from "etree_defs.h": + # macro call to 't->tp_new()' for fast instantiation + cdef _ElementProxy NEW_PROXY "PY_NEW" (object t) + cdef _ElementProxy _newProxy(_ElementProxy sourceProxy, tree.xmlNode* c_node): cdef _ElementProxy el - el = _ElementProxy() + el = NEW_PROXY(_ElementProxy) el._c_node = c_node if sourceProxy is None: - sourceProxy = el - el._dependent_proxies = [] - el._source_proxy = sourceProxy - python.PyList_Append(sourceProxy._dependent_proxies, el) + el._source_proxy = el + el._dependent_proxies = [el] + else: + el._source_proxy = sourceProxy + python.PyList_Append(sourceProxy._dependent_proxies, el) return el cdef _freeProxies(_ElementProxy sourceProxy): @@ -238,7 +296,8 @@ cdef class PythonElementClassLookup(FallbackElementClassLookup): - """Element class lookup based on a subclass method. + """PythonElementClassLookup(self, fallback=None) + Element class lookup based on a subclass method. To use it, inherit from this class and override the lookup method to lookup the element class for a node:: @@ -257,6 +316,10 @@ self._lookup_function = _lookup_class def lookup(self, doc, element): + """lookup(self, doc, element) + + Override this method to implement your own lookup scheme. + """ return None cdef object _lookup_class(state, _Document doc, tree.xmlNode* c_node): Modified: lxml/branch/lxml-2.0/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/nsclasses.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/nsclasses.pxi Thu Feb 14 15:52:28 2008 @@ -28,10 +28,14 @@ self._entries = {} def update(self, class_dict_iterable): - """Forgivingly update the registry. If registered values do not match - the required type for this registry, or if their name starts with '_', - they will be silently discarded. This allows registrations at the - module or class level using vars(), globals() etc.""" + """update(self, class_dict_iterable) + + Forgivingly update the registry. + + If registered values do not match the required type for this + registry, or if their name starts with '_', they will be + silently discarded. This allows registrations at the module or + class level using vars(), globals() etc.""" if hasattr(class_dict_iterable, 'items'): class_dict_iterable = class_dict_iterable.items() for name, item in class_dict_iterable: @@ -89,7 +93,9 @@ cdef class ElementNamespaceClassLookup(FallbackElementClassLookup): - """Element class lookup scheme that searches the Element class in the + """ElementNamespaceClassLookup(self, fallback=None) + + Element class lookup scheme that searches the Element class in the Namespace registry. """ cdef object _namespace_registries @@ -99,8 +105,11 @@ self._lookup_function = _find_nselement_class def get_namespace(self, ns_uri): - """Retrieve the namespace object associated with the given URI. Creates a - new one if it does not yet exist.""" + """get_namespace(self, ns_uri) + + Retrieve the namespace object associated with the given URI. + + Creates a new one if it does not yet exist.""" if ns_uri: ns_utf = _utf8(ns_uri) else: @@ -156,9 +165,13 @@ __FUNCTION_NAMESPACE_REGISTRIES = {} def FunctionNamespace(ns_uri): - """Retrieve the function namespace object associated with the given - URI. Creates a new one if it does not yet exist. A function namespace can - only be used to register extension functions.""" + """FunctionNamespace(ns_uri) + + Retrieve the function namespace object associated with the given + URI. + + Creates a new one if it does not yet exist. A function namespace + can only be used to register extension functions.""" if ns_uri: ns_utf = _utf8(ns_uri) else: Modified: lxml/branch/lxml-2.0/src/lxml/objectpath.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/objectpath.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/objectpath.pxi Thu Feb 14 15:52:28 2008 @@ -8,7 +8,8 @@ cdef class ObjectPath: - """Immutable object that represents a compiled object path. + """ObjectPath(path) + Immutable object that represents a compiled object path. Example for a path: 'root.child[1].{other}child[25]' """ @@ -54,6 +55,7 @@ default, use_default) def hasattr(self, _Element root not None): + "hasattr(self, root)" try: _findObjectPath(root, self._c_path, self._path_len, None, 0) except AttributeError: @@ -61,14 +63,18 @@ return True def setattr(self, _Element root not None, value): - """Set the value of the target element in a subtree. + """setattr(self, root, value) + + Set the value of the target element in a subtree. If any of the children on the path does not exist, it is created. """ _createObjectPath(root, self._c_path, self._path_len, 1, value) def addattr(self, _Element root not None, value): - """Append a value to the target element in a subtree. + """addattr(self, root, value) + + Append a value to the target element in a subtree. If any of the children on the path does not exist, it is created. """ Modified: lxml/branch/lxml-2.0/src/lxml/parser.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/parser.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/parser.pxi Thu Feb 14 15:52:28 2008 @@ -668,6 +668,7 @@ return context._error_log.copy() property resolvers: + "The custom resolver registry of this parser." def __get__(self): return self._resolvers @@ -681,7 +682,9 @@ self.set_element_class_lookup(lookup) def set_element_class_lookup(self, ElementClassLookup lookup = None): - """Set a lookup scheme for element classes generated from this parser. + """set_element_class_lookup(self, lookup = None) + + Set a lookup scheme for element classes generated from this parser. Reset it by passing None or nothing. """ @@ -702,11 +705,16 @@ return parser def copy(self): - "Create a new parser with the same configuration." + """copy(self) + + Create a new parser with the same configuration. + """ return self._copy() def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): - """Creates a new element associated with this parser. + """makeelement(self, _tag, attrib=None, nsmap=None, **_extra) + + Creates a new element associated with this parser. """ return _makeElement(_tag, NULL, None, self, None, None, attrib, nsmap, _extra) @@ -861,7 +869,9 @@ return context._error_log.copy() def feed(self, data): - """Feeds data to the parser. The argument should be an 8-bit string + """feed(self, data) + + Feeds data to the parser. The argument should be an 8-bit string buffer containing encoded data, although Unicode is supported as long as both string types are not mixed. @@ -942,7 +952,9 @@ context.cleanup() def close(self): - """Terminates feeding data to this parser. This tells the parser to + """close(self) + + Terminates feeding data to this parser. This tells the parser to process any remaining data in the feed buffer, and then returns the root Element of the tree that was parsed. @@ -1303,7 +1315,8 @@ ############################################################ cdef class TreeBuilder(_SaxParserTarget): - """Parser target that builds a tree. + """TreeBuilder(self, element_factory=None, parser=None) + Parser target that builds a tree. The final tree is returned by the ``close()`` method. """ @@ -1343,7 +1356,9 @@ # Python level event handlers def close(self): - """Flushes the builder buffers, and returns the toplevel document + """close(self) + + Flushes the builder buffers, and returns the toplevel document element. """ assert python.PyList_GET_SIZE(self._element_stack) == 0, "missing end tags" @@ -1351,19 +1366,27 @@ return self._last def data(self, data): - """Adds text to the current element. The value should be either an + """data(self, data) + + Adds text to the current element. The value should be either an 8-bit string containing ASCII text, or a Unicode string. """ self._handleSaxData(data) def start(self, tag, attrs, nsmap=None): - "Opens a new element." + """start(self, tag, attrs, nsmap=None) + + Opens a new element. + """ if nsmap is None: nsmap = EMPTY_READ_ONLY_DICT return self._handleSaxStart(tag, attrs, nsmap) def end(self, tag): - "Closes the current element." + """end(self, tag) + + Closes the current element. + """ element = self._handleSaxEnd(tag) assert self._last.tag == tag,\ "end tag mismatch (expected %s, got %s)" % ( @@ -1371,9 +1394,13 @@ return element def pi(self, target, data): + """pi(self, target, data) + """ return self._handleSaxPi(target, data) def comment(self, comment): + """comment(self, comment) + """ return self._handleSaxComment(comment) # internal SAX event handlers @@ -1432,33 +1459,38 @@ ) cdef class XMLParser(_FeedParser): - """The XML parser. Parsers can be supplied as additional argument to - various parse functions of the lxml API. A default parser is always - available and can be replaced by a call to the global function - 'set_default_parser'. New parsers can be created at any time without a - major run-time overhead. + """XMLParser(self, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, remove_blank_text=False, compact=True, resolve_entities=True, remove_comments=False, remove_pis=False, target=None, encoding=None, schema=None) + The XML parser. + + Parsers can be supplied as additional argument to various parse + functions of the lxml API. A default parser is always available + and can be replaced by a call to the global function + 'set_default_parser'. New parsers can be created at any time + without a major run-time overhead. The keyword arguments in the constructor are mainly based on the libxml2 parser configuration. A DTD will also be loaded if validation or attribute default values are requested. Available boolean keyword arguments: - * attribute_defaults - read default attributes from DTD - * dtd_validation - validate (if DTD is available) - * load_dtd - use DTD for parsing - * no_network - prevent network access for related files (default: True) - * ns_clean - clean up redundant namespace declarations - * recover - try hard to parse through broken XML - * remove_blank_text - discard blank text nodes - * remove_comments - discard comments - * remove_pis - discard processing instructions - * compact - safe memory for short text content (default: True) - * resolve_entities - replace entities by their text value (default: True) + + - attribute_defaults - read default attributes from DTD + - dtd_validation - validate (if DTD is available) + - load_dtd - use DTD for parsing + - no_network - prevent network access for related files (default: True) + - ns_clean - clean up redundant namespace declarations + - recover - try hard to parse through broken XML + - remove_blank_text - discard blank text nodes + - remove_comments - discard comments + - remove_pis - discard processing instructions + - compact - safe memory for short text content (default: True) + - resolve_entities - replace entities by their text value (default: True) Other keyword arguments: - * encoding - override the document encoding - * target - a parser target object that will receive the parse events - * schema - an XMLSchema to validate against + + - encoding - override the document encoding + - target - a parser target object that will receive the parse events + - schema - an XMLSchema to validate against Note that you should avoid sharing parsers between threads. While this is not harmful, it is more efficient to use separate parsers. This does not @@ -1498,8 +1530,10 @@ target, None, encoding) cdef class ETCompatXMLParser(XMLParser): - """An XML parser with an ElementTree compatible default setup. See the - XMLParser class for details. + """ETCompatXMLParser(self, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, remove_blank_text=False, compact=True, resolve_entities=True, remove_comments=True, remove_pis=True, target=None, encoding=None, schema=None) + An XML parser with an ElementTree compatible default setup. + + See the XMLParser class for details. This parser has ``remove_comments`` and ``remove_pis`` enabled by default and thus ignores comments and processing instructions. @@ -1532,15 +1566,17 @@ __GLOBAL_PARSER_CONTEXT.setDefaultParser(__DEFAULT_XML_PARSER) def setDefaultParser(parser=None): - "@deprecated: please use set_default_parser instead." + ":deprecated: please use set_default_parser instead." set_default_parser(parser) def getDefaultParser(): - "@deprecated: please use get_default_parser instead." + ":deprecated: please use get_default_parser instead." return get_default_parser() def set_default_parser(_BaseParser parser=None): - """Set a default parser for the current thread. This parser is used + """set_default_parser(parser=None) + + Set a default parser for the current thread. This parser is used globally whenever no parser is supplied to the various parse functions of the lxml API. If this function is called without a parser (or if it is None), the default parser is reset to the original configuration. @@ -1554,6 +1590,7 @@ __GLOBAL_PARSER_CONTEXT.setDefaultParser(parser) def get_default_parser(): + "get_default_parser()" return __GLOBAL_PARSER_CONTEXT.getDefaultParser() ############################################################ @@ -1568,22 +1605,28 @@ ) cdef class HTMLParser(_FeedParser): - """The HTML parser. This parser allows reading HTML into a normal XML - tree. By default, it can read broken (non well-formed) HTML, depending on - the capabilities of libxml2. Use the 'recover' option to switch this off. + """HTMLParser(self, recover=True, no_network=True, remove_blank_text=False, compact=True, remove_comments=False, remove_pis=False, target=None, encoding=None, schema=None) + The HTML parser. + + This parser allows reading HTML into a normal XML tree. By + default, it can read broken (non well-formed) HTML, depending on + the capabilities of libxml2. Use the 'recover' option to switch + this off. Available boolean keyword arguments: - * recover - try hard to parse through broken HTML (default: True) - * no_network - prevent network access for related files (default: True) - * remove_blank_text - discard empty text nodes - * remove_comments - discard comments - * remove_pis - discard processing instructions - * compact - safe memory for short text content (default: True) + + - recover - try hard to parse through broken HTML (default: True) + - no_network - prevent network access for related files (default: True) + - remove_blank_text - discard empty text nodes + - remove_comments - discard comments + - remove_pis - discard processing instructions + - compact - safe memory for short text content (default: True) Other keyword arguments: - * encoding - override the document encoding - * target - a parser target object that will receive the parse events - * schema - an XMLSchema to validate against + + - encoding - override the document encoding + - target - a parser target object that will receive the parse events + - schema - an XMLSchema to validate against Note that you should avoid sharing parsers between threads for performance reasons. Modified: lxml/branch/lxml-2.0/src/lxml/relaxng.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/relaxng.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/relaxng.pxi Thu Feb 14 15:52:28 2008 @@ -20,7 +20,8 @@ # RelaxNG cdef class RelaxNG(_Validator): - """Turn a document into a Relax NG validator. + """RelaxNG(self, etree=None, file=None) + Turn a document into a Relax NG validator. Either pass a schema as Element or ElementTree, or pass a file or filename through the ``file`` keyword argument. @@ -91,7 +92,9 @@ relaxng.xmlRelaxNGFree(self._c_schema) def __call__(self, etree): - """Validate doc using Relax NG. + """__call__(self, etree) + + Validate doc using Relax NG. Returns true if document is valid, false if not.""" cdef _Document doc Modified: lxml/branch/lxml-2.0/src/lxml/sax.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/sax.py (original) +++ lxml/branch/lxml-2.0/src/lxml/sax.py Thu Feb 14 15:52:28 2008 @@ -1,3 +1,15 @@ +""" +SAX-based adapter to copy trees from/to the Python standard library. + +Use the `ElementTreeContentHandler` class to build an ElementTree from +SAX events. + +Use the `ElementTreeProducer` class or the `saxify()` function to fire +the SAX events of an ElementTree against a SAX ContentHandler. + +See http://codespeak.net/lxml/sax.html +""" + from xml.sax.handler import ContentHandler import etree from etree import ElementTree, SubElement @@ -220,4 +232,7 @@ return prefix + ':' + local_name def saxify(element_or_tree, content_handler): + """One-shot helper to generate SAX events from an XML tree and fire + them against a SAX ContentHandler. + """ return ElementTreeProducer(element_or_tree, content_handler).saxify() Modified: lxml/branch/lxml-2.0/src/lxml/schematron.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/schematron.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/schematron.pxi Thu Feb 14 15:52:28 2008 @@ -66,14 +66,15 @@ # Schematron cdef class Schematron(_Validator): - """A Schematron validator. + """Schematron(self, etree=None, file=None) + A Schematron validator. Pass a root Element or an ElementTree to turn it into a validator. Alternatively, pass a filename as keyword argument 'file' to parse from the file system. """ cdef schematron.xmlSchematron* _c_schema - def __init__(self, etree=None, file=None): + def __init__(self, etree=None, *, file=None): cdef _Document doc cdef _Element root_node cdef xmlNode* c_node @@ -120,7 +121,9 @@ schematron.xmlSchematronFree(self._c_schema) def __call__(self, etree): - """Validate doc using Schematron. + """__call__(self, etree) + + Validate doc using Schematron. Returns true if document is valid, false if not.""" cdef _Document doc Modified: lxml/branch/lxml-2.0/src/lxml/tests/__init__.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/tests/__init__.py (original) +++ lxml/branch/lxml-2.0/src/lxml/tests/__init__.py Thu Feb 14 15:52:28 2008 @@ -1,2 +1,4 @@ -# this is a package +""" +The lxml test suite for lxml, ElementTree and cElementTree. +""" Modified: lxml/branch/lxml-2.0/src/lxml/tests/common_imports.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/tests/common_imports.py (original) +++ lxml/branch/lxml-2.0/src/lxml/tests/common_imports.py Thu Feb 14 15:52:28 2008 @@ -5,6 +5,15 @@ from lxml import etree +def make_version_tuple(version_string): + l = [] + for part in re.findall('([0-9]+|[^0-9.]+)', version_string): + try: + l.append(int(part)) + except ValueError: + l.append(part) + return tuple(l) + try: from elementtree import ElementTree # standard ET except ImportError: @@ -14,7 +23,7 @@ ElementTree = None if hasattr(ElementTree, 'VERSION'): - if tuple(ElementTree.VERSION.split('.')) < (1,3): + if make_version_tuple(ElementTree.VERSION)[:2] < (1,3): # compatibility tests require ET 1.3+ ElementTree = None @@ -27,8 +36,8 @@ cElementTree = None if hasattr(cElementTree, 'VERSION'): - if tuple(cElementTree.VERSION.split('.')) < (1,0,7): - # compatibility tests require cET 1.0.7+ + if make_version_tuple(cElementTree.VERSION)[:2] <= (1,0): + # compatibility tests do not run with cET 1.0.7 cElementTree = None try: Modified: lxml/branch/lxml-2.0/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/lxml-2.0/src/lxml/tests/test_elementtree.py Thu Feb 14 15:52:28 2008 @@ -16,7 +16,7 @@ if cElementTree is not None: if tuple([int(n) for n in - getattr(cElementTree, "VERSION", "0.0").split(".")]) <= (1,0,6): + getattr(cElementTree, "VERSION", "0.0").split(".")]) <= (1,0,7): cElementTree = None try: @@ -2006,6 +2006,50 @@ [d, c, b], list(a)) + def test_setslice_all_replace_reversed_ns1(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('{ns}a') + b = SubElement(a, '{ns}b', {'{ns1}a1': 'test'}) + c = SubElement(a, '{ns}c', {'{ns2}a2': 'test'}) + d = SubElement(a, '{ns}d', {'{ns3}a3': 'test'}) + + s = [d, c, b] + a[:] = s + self.assertEquals( + [d, c, b], + list(a)) + self.assertEquals( + ['{ns}d', '{ns}c', '{ns}b'], + [ child.tag for child in a ]) + + self.assertEquals( + [['{ns3}a3'], ['{ns2}a2'], ['{ns1}a1']], + [ child.attrib.keys() for child in a ]) + + def test_setslice_all_replace_reversed_ns2(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('{ns}a') + b = SubElement(a, '{ns1}b', {'{ns}a1': 'test'}) + c = SubElement(a, '{ns2}c', {'{ns}a2': 'test'}) + d = SubElement(a, '{ns3}d', {'{ns}a3': 'test'}) + + s = [d, c, b] + a[:] = s + self.assertEquals( + [d, c, b], + list(a)) + self.assertEquals( + ['{ns3}d', '{ns2}c', '{ns1}b'], + [ child.tag for child in a ]) + + self.assertEquals( + [['{ns}a3'], ['{ns}a2'], ['{ns}a1']], + [ child.attrib.keys() for child in a ]) + def test_setslice_end(self): Element = self.etree.Element SubElement = self.etree.SubElement Modified: lxml/branch/lxml-2.0/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/lxml-2.0/src/lxml/tests/test_objectify.py Thu Feb 14 15:52:28 2008 @@ -427,9 +427,61 @@ self.assertRaises(TypeError, setattr, root.c1.c2, 'text', "test") self.assertRaises(TypeError, setattr, root.c1.c2, 'pyval', "test") - def test_setslice(self): + # slicing + + def test_getslice_complete(self): + root = self.XML("c1c2") + self.assertEquals(["c1", "c2"], + [ c.text for c in root.c[:] ]) + + def test_getslice_partial(self): + root = self.XML("c1c2c3c4") + test_list = ["c1", "c2", "c3", "c4"] + + self.assertEquals(test_list, + [ c.text for c in root.c[:] ]) + self.assertEquals(test_list[1:2], + [ c.text for c in root.c[1:2] ]) + self.assertEquals(test_list[-3:-1], + [ c.text for c in root.c[-3:-1] ]) + self.assertEquals(test_list[-3:3], + [ c.text for c in root.c[-3:3] ]) + self.assertEquals(test_list[-3000:3], + [ c.text for c in root.c[-3000:3] ]) + self.assertEquals(test_list[-3:3000], + [ c.text for c in root.c[-3:3000] ]) + + def test_getslice_partial_neg(self): + root = self.XML("c1c2c3c4") + test_list = ["c1", "c2", "c3", "c4"] + + self.assertEquals(test_list, + [ c.text for c in root.c[:] ]) + self.assertEquals(test_list[2:1:-1], + [ c.text for c in root.c[2:1:-1] ]) + self.assertEquals(test_list[-1:-3:-1], + [ c.text for c in root.c[-1:-3:-1] ]) + self.assertEquals(test_list[2:-3:-1], + [ c.text for c in root.c[2:-3:-1] ]) + self.assertEquals(test_list[2:-3000:-1], + [ c.text for c in root.c[2:-3000:-1] ]) + + # slice assignment + + def test_setslice_complete(self): + Element = self.Element + root = Element("root") + root.c = ["c1", "c2"] + + c1 = root.c[0] + c2 = root.c[1] + + self.assertEquals([c1,c2], list(root.c)) + self.assertEquals(["c1", "c2"], + [ c.text for c in root.c ]) + + def test_setslice_elements(self): Element = self.Element - SubElement = self.etree.SubElement root = Element("root") root.c = ["c1", "c2"] @@ -455,10 +507,143 @@ self.assertEquals(["c1", "c2", "c2", "c1"], [ c.text for c in root.c ]) + def test_setslice_partial(self): + Element = self.Element + root = Element("root") + l = ["c1", "c2", "c3", "c4"] + root.c = l + + self.assertEquals(["c1", "c2", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + new_slice = ["cA", "cB"] + l[1:2] = new_slice + root.c[1:2] = new_slice + + self.assertEquals(["c1", "cA", "cB", "c3", "c4"], l) + self.assertEquals(["c1", "cA", "cB", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + def test_setslice_insert(self): + Element = self.Element + root = Element("root") + l = ["c1", "c2", "c3", "c4"] + root.c = l + + self.assertEquals(["c1", "c2", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + new_slice = ["cA", "cB"] + l[1:1] = new_slice + root.c[1:1] = new_slice + + self.assertEquals(["c1", "cA", "cB", "c2", "c3", "c4"], l) + self.assertEquals(["c1", "cA", "cB", "c2", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + def test_setslice_insert_neg(self): + Element = self.Element + root = Element("root") + l = ["c1", "c2", "c3", "c4"] + root.c = l + + self.assertEquals(["c1", "c2", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + new_slice = ["cA", "cB"] + l[-2:-2] = new_slice + root.c[-2:-2] = new_slice + + self.assertEquals(["c1", "c2", "cA", "cB", "c3", "c4"], l) + self.assertEquals(["c1", "c2", "cA", "cB", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + def test_setslice_empty(self): + Element = self.Element + root = Element("root") + + root.c = [] + self.assertRaises( + AttributeError, getattr, root, 'c') + + def test_setslice_partial_wrong_length(self): + Element = self.Element + root = Element("root") + l = ["c1", "c2", "c3", "c4"] + root.c = l + + self.assertEquals(["c1", "c2", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + new_slice = ["cA", "cB", "cC"] + self.assertRaises( + ValueError, operator.setitem, + l, slice(1,2,-1), new_slice) + self.assertRaises( + ValueError, operator.setitem, + root.c, slice(1,2,-1), new_slice) + + def test_setslice_partial_neg(self): + Element = self.Element + root = Element("root") + l = ["c1", "c2", "c3", "c4"] + root.c = l + + self.assertEquals(["c1", "c2", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + new_slice = ["cA", "cB"] + l[-1:1:-1] = new_slice + root.c[-1:1:-1] = new_slice + + self.assertEquals(["c1", "c2", "cB", "cA"], l) + self.assertEquals(["c1", "c2", "cB", "cA"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + def test_setslice_partial_allneg(self): + Element = self.Element + root = Element("root") + l = ["c1", "c2", "c3", "c4"] + root.c = l + + self.assertEquals(["c1", "c2", "c3", "c4"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + new_slice = ["cA", "cB"] + l[-1:-4:-2] = new_slice + root.c[-1:-4:-2] = new_slice + + self.assertEquals(["c1", "cB", "c3", "cA"], l) + self.assertEquals(["c1", "cB", "c3", "cA"], + [ c.text for c in root.c ]) + self.assertEquals(l, + [ c.text for c in root.c ]) + + # other stuff + def test_set_string(self): # make sure strings are not handled as sequences Element = self.Element - SubElement = self.etree.SubElement root = Element("root") root.c = "TEST" self.assertEquals(["TEST"], @@ -467,7 +652,6 @@ def test_setitem_string(self): # make sure strings are set as children Element = self.Element - SubElement = self.etree.SubElement root = Element("root") root["c"] = "TEST" self.assertEquals(["TEST"], @@ -476,7 +660,6 @@ def test_setitem_string_special(self): # make sure 'text' etc. are set as children Element = self.Element - SubElement = self.etree.SubElement root = Element("root") root["text"] = "TEST" Modified: lxml/branch/lxml-2.0/src/lxml/tests/test_pyclasslookup.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/tests/test_pyclasslookup.py (original) +++ lxml/branch/lxml-2.0/src/lxml/tests/test_pyclasslookup.py Thu Feb 14 15:52:28 2008 @@ -245,6 +245,57 @@ self.assertEquals([ c.tag for c in root.getchildren() ], child_tags) + def test_lookup_iter_children(self): + el_class = self._buildElementClass() + el_class.CHILD_TAGS = None + def lookup(doc, el): + if el_class.CHILD_TAGS is None: + el_class.CHILD_TAGS = [ c.tag for c in el ] + return el_class + self._setClassLookup(lookup) + root = self.XML(xml_str) + child_tags = root.CHILD_TAGS + self.assertNotEquals(None, child_tags) + self.assertEquals([ c.tag for c in root.getchildren() ], + child_tags) + + def test_lookup_iterchildren(self): + el_class = self._buildElementClass() + el_class.CHILD_TAGS = None + def lookup(doc, el): + if el_class.CHILD_TAGS is None: + el_class.CHILD_TAGS = [ c.tag for c in el.iterchildren() ] + return el_class + self._setClassLookup(lookup) + root = self.XML(xml_str) + child_tags = root.CHILD_TAGS + self.assertNotEquals(None, child_tags) + self.assertEquals([ c.tag for c in root.getchildren() ], + child_tags) + + def test_lookup_iterchildren_tag(self): + el_class = self._buildElementClass() + el_class.CHILD_TAGS = None + def lookup(doc, el): + if not el_class.CHILD_TAGS: + el_class.CHILD_TAGS = [ + c.tag for c in el.iterchildren(tag='{objectified}c2') ] + return el_class + self._setClassLookup(lookup) + + root = self.XML(xml_str) + child_tags = root.CHILD_TAGS + self.assertNotEquals(None, child_tags) + self.assertEquals([], child_tags) + + c1 = root[0] + child_tags = root.CHILD_TAGS + self.assertNotEquals(None, child_tags) + self.assertNotEquals([], child_tags) + self.assertEquals( + [ c.tag for c in root[0].iterchildren(tag='{objectified}c2') ], + child_tags) + def test_lookup_getparent(self): el_class = self._buildElementClass() el_class.PARENT = None Modified: lxml/branch/lxml-2.0/src/lxml/usedoctest.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/usedoctest.py (original) +++ lxml/branch/lxml-2.0/src/lxml/usedoctest.py Thu Feb 14 15:52:28 2008 @@ -1,3 +1,13 @@ +"""Doctest module for XML comparison. + +Usage:: + + >>> import lxml.usedoctest + >>> # now do your XML doctests ... + +See `lxml.doctestcompare` +""" + from lxml import doctestcompare doctestcompare.temp_install(del_module=__name__) Modified: lxml/branch/lxml-2.0/src/lxml/xinclude.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/xinclude.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/xinclude.pxi Thu Feb 14 15:52:28 2008 @@ -8,7 +8,8 @@ pass cdef class XInclude: - """XInclude processor. + """XInclude(self) + XInclude processor. Create an instance and call it on an Element to run XInclude processing. @@ -22,6 +23,7 @@ return self._error_log.copy() def __call__(self, _Element node not None): + "__call__(self, node)" # We cannot pass the XML_PARSE_NOXINCNODE option as this would free # the XInclude nodes - there may still be Python references to them! # Therefore, we allow XInclude nodes to be converted to Modified: lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi Thu Feb 14 15:52:28 2008 @@ -5,7 +5,9 @@ # module level API functions def clear_error_log(): - """Clear the global error log. Note that this log is already bound to a + """clear_error_log() + + Clear the global error log. Note that this log is already bound to a fixed size. """ __GLOBAL_ERROR_LOG.clear() @@ -14,7 +16,7 @@ """Clear the global error log. Note that this log is already bound to a fixed size. - @deprecated: use ``clear_error_log()`` instead. + :deprecated: use ``clear_error_log()`` instead. """ __GLOBAL_ERROR_LOG.clear() @@ -233,8 +235,10 @@ return _ListErrorLog(filtered, None, None) def filter_types(self, types): - """Filter the errors by the given types and return a new error log - containing the matches. + """filter_types(self, types) + + Filter the errors by the given types and return a new error + log containing the matches. """ cdef _LogEntry entry if not python.PySequence_Check(types): @@ -246,8 +250,10 @@ return _ListErrorLog(filtered, None, None) def filter_levels(self, levels): - """Filter the errors by the given error levels and return a new error - log containing the matches. + """filter_levels(self, levels) + + Filter the errors by the given error levels and return a new + error log containing the matches. """ cdef _LogEntry entry if not python.PySequence_Check(levels): @@ -259,7 +265,10 @@ return _ListErrorLog(filtered, None, None) def filter_from_level(self, level): - "Return a log with all messages of the requested level of worse." + """filter_from_level(self, level) + + Return a log with all messages of the requested level of worse. + """ cdef _LogEntry entry filtered = [] for entry in self._entries: @@ -268,15 +277,24 @@ return _ListErrorLog(filtered, None, None) def filter_from_fatals(self): - "Convenience method to get all fatal error messages." + """filter_from_fatals(self) + + Convenience method to get all fatal error messages. + """ return self.filter_from_level(ErrorLevels.FATAL) def filter_from_errors(self): - "Convenience method to get all error messages or worse." + """filter_from_errors(self) + + Convenience method to get all error messages or worse. + """ return self.filter_from_level(ErrorLevels.ERROR) def filter_from_warnings(self): - "Convenience method to get all warnings or worse." + """filter_from_warnings(self) + + Convenience method to get all warnings or worse. + """ return self.filter_from_level(ErrorLevels.WARNING) cdef class _ErrorLog(_ListErrorLog): @@ -331,7 +349,8 @@ python.PyList_Append(entries, entry) cdef class PyErrorLog(_BaseErrorLog): - """A global error log that connects to the Python stdlib logging package. + """PyErrorLog(self, logger_name=None) + A global error log that connects to the Python stdlib logging package. The constructor accepts an optional logger name. @@ -395,12 +414,14 @@ Note that this disables access to the global error log from exceptions. Parsers, XSLT etc. will continue to provide their normal local error log. - @deprecated: use ``use_global_python_log()`` instead. + :deprecated: use ``use_global_python_log()`` instead. """ use_global_python_log(log) def use_global_python_log(PyErrorLog log not None): - """Replace the global error log by an etree.PyErrorLog that uses the + """use_global_python_log(log) + + Replace the global error log by an etree.PyErrorLog that uses the standard Python logging package. Note that this disables access to the global error log from exceptions. Modified: lxml/branch/lxml-2.0/src/lxml/xmlid.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/xmlid.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/xmlid.pxi Thu Feb 14 15:52:28 2008 @@ -1,7 +1,9 @@ cdef object _find_id_attributes def XMLID(text): - """Parse the text and return a tuple (root node, ID dictionary). The root + """XMLID(text) + + Parse the text and return a tuple (root node, ID dictionary). The root node is the same as returned by the XML() function. The dictionary contains string-element pairs. The dictionary keys are the values of 'id' attributes. The elements referenced by the ID are stored as dictionary @@ -19,7 +21,9 @@ return (root, dic) def XMLDTDID(text): - """Parse the text and return a tuple (root node, ID dictionary). The root + """XMLDTDID(text) + + Parse the text and return a tuple (root node, ID dictionary). The root node is the same as returned by the XML() function. The dictionary contains string-element pairs. The dictionary keys are the values of ID attributes as defined by the DTD. The elements referenced by the ID are @@ -37,7 +41,9 @@ return (root, _IDDict(root)) def parseid(source, parser=None): - """Parses the source into a tuple containing an ElementTree object and an + """parseid(source, parser=None) + + Parses the source into a tuple containing an ElementTree object and an ID dictionary. If no parser is provided as second argument, the default parser is used. @@ -49,7 +55,8 @@ return (_elementTreeFactory(doc, None), _IDDict(doc)) cdef class _IDDict: - """A dictionary-like proxy class that mapps ID attributes to elements. + """IDDict(self, etree) + A dictionary-like proxy class that mapps ID attributes to elements. The dictionary must be instantiated with the root element of a parsed XML document, otherwise the behaviour is undefined. Elements and XML trees Modified: lxml/branch/lxml-2.0/src/lxml/xmlschema.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/xmlschema.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/xmlschema.pxi Thu Feb 14 15:52:28 2008 @@ -20,7 +20,8 @@ # XMLSchema cdef class XMLSchema(_Validator): - """Turn a document into an XML Schema validator. + """XMLSchema(self, etree=None, file=None) + Turn a document into an XML Schema validator. Either pass a schema as Element or ElementTree, or pass a file or filename through the ``file`` keyword argument. @@ -83,7 +84,9 @@ xmlschema.xmlSchemaFree(self._c_schema) def __call__(self, etree): - """Validate doc using XML Schema. + """__call__(self, etree) + + Validate doc using XML Schema. Returns true if document is valid, false if not. """ Modified: lxml/branch/lxml-2.0/src/lxml/xpath.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/xpath.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/xpath.pxi Thu Feb 14 15:52:28 2008 @@ -127,13 +127,17 @@ self._context.set_context(xpathCtxt) def evaluate(self, _eval_arg, **_variables): - """Evaluate an XPath expression. + """evaluate(self, _eval_arg, **_variables) + + Evaluate an XPath expression. Instead of calling this method, you can also call the evaluator object itself. Variables may be provided as keyword arguments. Note that namespaces are currently not supported for variables. + + :deprecated: call the object, not its method. """ return self(_eval_arg, **_variables) @@ -207,7 +211,8 @@ cdef class XPathElementEvaluator(_XPathEvaluatorBase): - """Create an XPath evaluator for an element. + """XPathElementEvaluator(self, element, namespaces=None, extensions=None, regexp=True) + Create an XPath evaluator for an element. Absolute XPath expressions (starting with '/') will be evaluated against the ElementTree as returned by getroottree(). @@ -232,17 +237,34 @@ def registerNamespace(self, prefix, uri): """Register a namespace with the XPath context. + + :deprecated: use ``register_namespace()`` instead + """ + self._context.addNamespace(prefix, uri) + + def register_namespace(self, prefix, uri): + """Register a namespace with the XPath context. """ self._context.addNamespace(prefix, uri) def registerNamespaces(self, namespaces): """Register a prefix -> uri dict. + + :deprecated: use ``register_namespaces()`` instead + """ + for prefix, uri in namespaces.items(): + self._context.addNamespace(prefix, uri) + + def register_namespaces(self, namespaces): + """Register a prefix -> uri dict. """ for prefix, uri in namespaces.items(): self._context.addNamespace(prefix, uri) def __call__(self, _path, **_variables): - """Evaluate an XPath expression on the document. + """__call__(self, _path, **_variables) + + Evaluate an XPath expression on the document. Variables may be provided as keyword arguments. Note that namespaces are currently not supported for variables. @@ -276,7 +298,8 @@ cdef class XPathDocumentEvaluator(XPathElementEvaluator): - """Create an XPath evaluator for an ElementTree. + """XPathDocumentEvaluator(self, etree, namespaces=None, extensions=None, regexp=True) + Create an XPath evaluator for an ElementTree. Additional namespace declarations can be passed with the 'namespace' keyword argument. EXSLT regular expression support can be disabled with @@ -289,7 +312,9 @@ extensions=extensions, regexp=regexp) def __call__(self, _path, **_variables): - """Evaluate an XPath expression on the document. + """__call__(self, _path, **_variables) + + Evaluate an XPath expression on the document. Variables may be provided as keyword arguments. Note that namespaces are currently not supported for variables. @@ -327,7 +352,9 @@ def XPathEvaluator(etree_or_element, *, namespaces=None, extensions=None, regexp=True): - """Creates an XPath evaluator for an ElementTree or an Element. + """XPathEvaluator(etree_or_element, namespaces=None, extensions=None, regexp=True) + + Creates an XPath evaluator for an ElementTree or an Element. The resulting object can be called with an XPath expression as argument and XPath variables provided as keyword arguments. @@ -347,8 +374,8 @@ cdef class XPath(_XPathEvaluatorBase): - """A compiled XPath expression that can be called on Elements and - ElementTrees. + """XPath(self, path, namespaces=None, extensions=None, regexp=True) + A compiled XPath expression that can be called on Elements and ElementTrees. Besides the XPath expression, you can pass prefix-namespace mappings and extension functions to the constructor through the keyword arguments @@ -374,6 +401,7 @@ self._raise_parse_error() def __call__(self, _etree_or_element, **_variables): + "__call__(self, _etree_or_element, **_variables)" cdef xpath.xmlXPathObject* xpathObj cdef _Document document cdef _Element element @@ -414,8 +442,8 @@ _find_namespaces = re.compile('({[^}]+})').findall cdef class ETXPath(XPath): - """Special XPath class that supports the ElementTree {uri} notation for - namespaces. + """ETXPath(self, path, extensions=None, regexp=True) + Special XPath class that supports the ElementTree {uri} notation for namespaces. Note that this class does not accept the ``namespace`` keyword argument. All namespaces must be passed as part of the path string. Modified: lxml/branch/lxml-2.0/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/xslt.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/xslt.pxi Thu Feb 14 15:52:28 2008 @@ -167,16 +167,20 @@ # XSLT file/network access control cdef class XSLTAccessControl: - """Access control for XSLT: reading/writing files, directories and network - I/O. Access to a type of resource is granted or denied by passing any of - the following keyword arguments. All of them default to True to allow - access. - - * read_file - * write_file - * create_dir - * read_network - * write_network + """XSLTAccessControl(self, read_file=True, write_file=True, create_dir=True, read_network=True, write_network=True) + + Access control for XSLT: reading/writing files, directories and + network I/O. Access to a type of resource is granted or denied by + passing any of the following boolean keyword arguments. All of + them default to True to allow access. + + - read_file + - write_file + - create_dir + - read_network + - write_network + + See `XSLT`. """ cdef xslt.xsltSecurityPrefs* _prefs def __init__(self, *, read_file=True, write_file=True, create_dir=True, @@ -252,16 +256,28 @@ cdef class XSLT: - """Turn a document into an XSLT object. + """XSLT(self, xslt_input, extensions=None, regexp=True, access_control=None) + + Turn an XSL document into an XSLT object. + + Calling this object on a tree or Element will execute the XSLT:: + + >>> transform = etree.XSLT(xsl_tree) + >>> result = transform(xml_tree) Keyword arguments of the constructor: - * regexp - enable exslt regular expression support in XPath (default: True) - * access_control - access restrictions for network or file system - Keyword arguments of the XSLT run: - * profile_run - enable XSLT profiling + - regexp: enable exslt regular expression support in XPath + (default: True) + - access_control: access restrictions for network or file + system (see `XSLTAccessControl`) + + Keyword arguments of the XSLT call: - Other keyword arguments are passed to the stylesheet. + - profile_run: enable XSLT profiling (default: False) + + Other keyword arguments of the call are passed to the stylesheet + as parameters. """ cdef _XSLTContext _context cdef xslt.xsltStylesheet* _c_style @@ -328,14 +344,22 @@ xslt.xsltFreeStylesheet(self._c_style) property error_log: + "The log of errors and warnings of an XSLT execution." def __get__(self): return self._error_log.copy() def apply(self, _input, *, profile_run=False, **_kw): + """apply(self, _input, profile_run=False, **_kw) + + :deprecated: call the object, not this method.""" return self(_input, profile_run=profile_run, **_kw) def tostring(self, _ElementTree result_tree): - """Save result doc to string based on stylesheet output method. + """tostring(self, result_tree) + + Save result doc to string based on stylesheet output method. + + :deprecated: use str(result_tree) instead. """ return str(result_tree) @@ -346,6 +370,14 @@ return _copyXSLT(self) def __call__(self, _input, *, profile_run=False, **_kw): + """__call__(self, _input, profile_run=False, **_kw) + + Execute the XSL transformation on a tree or Element. + + Pass the ``profile_run`` option to get profile information + about the XSLT. The result of the XSLT will have a property + xslt_profile that holds an XML tree with profiling data. + """ cdef _XSLTContext context cdef _XSLTResolverContext resolver_context cdef _Document input_doc Modified: lxml/branch/lxml-2.0/version.txt ============================================================================== --- lxml/branch/lxml-2.0/version.txt (original) +++ lxml/branch/lxml-2.0/version.txt Thu Feb 14 15:52:28 2008 @@ -1 +1 @@ -2.0 +2.0.1 From scoder at codespeak.net Thu Feb 14 21:11:59 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 14 Feb 2008 21:11:59 +0100 (CET) Subject: [Lxml-checkins] r51494 - in lxml/trunk: . doc Message-ID: <20080214201159.A687A16844B@codespeak.net> Author: scoder Date: Thu Feb 14 21:11:55 2008 New Revision: 51494 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/objectify.txt Log: r3509 at delle: sbehnel | 2008-02-14 21:02:45 +0100 doc cleanup Modified: lxml/trunk/doc/objectify.txt ============================================================================== --- lxml/trunk/doc/objectify.txt (original) +++ lxml/trunk/doc/objectify.txt Thu Feb 14 21:11:55 2008 @@ -16,31 +16,6 @@ used. Python data types are extracted from XML content automatically and made available to the normal Python operators. -To set up and use ``objectify``, you need both the ``lxml.etree`` module and -``lxml.objectify``:: - - >>> from lxml import etree - >>> from lxml import objectify - -The objectify API is very different from the ElementTree API. If it -is used, it should not be mixed with other element implementations -(such as trees parsed with ``lxml.etree``), to avoid non-obvious -behaviour. - -The `benchmark page`_ has some hints on performance optimisation of code using -lxml.objectify. - -To make the doctests in this document look a little nicer, we also use this: - - >>> import lxml.usedoctest - -Imported from within a doctest, this relieves us from caring about the exact -formatting of XML output. - -.. _Amara: http://uche.ogbuji.net/tech/4suite/amara/ -.. _gnosis.xml.objectify: http://gnosis.cx/download/ -.. _`benchmark page`: performance.html#lxml-objectify - .. contents:: .. 1 The lxml.objectify API @@ -61,6 +36,33 @@ 5.5 Advanced element class lookup 6 What is different from lxml.etree? +.. _Amara: http://uche.ogbuji.net/tech/4suite/amara/ +.. _gnosis.xml.objectify: http://gnosis.cx/download/ +.. _`benchmark page`: performance.html#lxml-objectify + + +To set up and use ``objectify``, you need both the ``lxml.etree`` +module and ``lxml.objectify``:: + + >>> from lxml import etree + >>> from lxml import objectify + +The objectify API is very different from the ElementTree API. If it +is used, it should not be mixed with other element implementations +(such as trees parsed with ``lxml.etree``), to avoid non-obvious +behaviour. + +The `benchmark page`_ has some hints on performance optimisation of +code using lxml.objectify. + +To make the doctests in this document look a little nicer, we also use +this:: + + >>> import lxml.usedoctest + +Imported from within a doctest, this relieves us from caring about the exact +formatting of XML output. + The lxml.objectify API ====================== From scoder at codespeak.net Thu Feb 14 21:12:02 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 14 Feb 2008 21:12:02 +0100 (CET) Subject: [Lxml-checkins] r51495 - in lxml/trunk: . src/lxml Message-ID: <20080214201202.2F5BE16844B@codespeak.net> Author: scoder Date: Thu Feb 14 21:12:01 2008 New Revision: 51495 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.objectify.pyx Log: r3510 at delle: sbehnel | 2008-02-14 21:04:10 +0100 cleanup of ObjectifiedElement.__delitem__() Modified: lxml/trunk/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.objectify.pyx (original) +++ lxml/trunk/src/lxml/lxml.objectify.pyx Thu Feb 14 21:12:01 2008 @@ -317,26 +317,17 @@ def __delitem__(self, key): cdef Py_ssize_t start, stop, step, slicelength + parent = self.getparent() + if parent is None: + raise TypeError("deleting items not supported by root element") if python.PySlice_Check(key): # slice deletion - python.PySlice_GetIndicesEx( - key, _countSiblings(self._c_node), - &start, &stop, &step, &slicelength) - parent = self.getparent() - if parent is None: - raise TypeError("deleting slices of root element not supported") - if step < 0: - del_items = list(self)[start:stop:step] - else: - del_items = list(islice(self, start, stop, step)) + del_items = list(self)[key] remove = parent.remove for el in del_items: remove(el) else: # normal index deletion - parent = self.getparent() - if parent is None: - raise TypeError("deleting items not supported by root element") sibling = self.__getitem__(key) parent.remove(sibling) From scoder at codespeak.net Thu Feb 14 21:12:06 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 14 Feb 2008 21:12:06 +0100 (CET) Subject: [Lxml-checkins] r51496 - lxml/trunk Message-ID: <20080214201206.74C9016844E@codespeak.net> Author: scoder Date: Thu Feb 14 21:12:05 2008 New Revision: 51496 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r3511 at delle: sbehnel | 2008-02-14 21:10:41 +0100 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Feb 14 21:12:05 2008 @@ -2,6 +2,21 @@ lxml changelog ============== +2.0.2 (Under development) +========================= + +Features added +-------------- + +Bugs fixed +---------- + +* Slice deletion bug fixed in objectify. + +Other changes +------------- + + 2.0.1 (2008-02-13) ================== From scoder at codespeak.net Fri Feb 15 10:22:28 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 15 Feb 2008 10:22:28 +0100 (CET) Subject: [Lxml-checkins] r51508 - in lxml/trunk: . doc Message-ID: <20080215092228.D4B1016843E@codespeak.net> Author: scoder Date: Fri Feb 15 10:22:28 2008 New Revision: 51508 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/build.txt lxml/trunk/doc/pyrex.txt Log: r3515 at delle: sbehnel | 2008-02-15 08:27:28 +0100 doc fixes Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Fri Feb 15 10:22:28 2008 @@ -44,10 +44,10 @@ want to be an lxml developer, then you do need a working Cython installation. You can use EasyInstall_ to install it:: - easy_install Cython==0.9.6.11b + easy_install Cython==0.9.6.12 -lxml currently requires Cython 0.9.6.11b, later versions were not -tested. +lxml currently requires Cython 0.9.6.11b or 0.9.6.12, later versions +were not tested. Subversion Modified: lxml/trunk/doc/pyrex.txt ============================================================================== --- lxml/trunk/doc/pyrex.txt (original) +++ lxml/trunk/doc/pyrex.txt Fri Feb 15 10:22:28 2008 @@ -22,4 +22,4 @@ clear description of what you did to run into the problems and provide the compiler output that shows the error. -.. Pyrex_: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ +.. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ From scoder at codespeak.net Fri Feb 15 10:22:33 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 15 Feb 2008 10:22:33 +0100 (CET) Subject: [Lxml-checkins] r51509 - in lxml/trunk: . doc Message-ID: <20080215092233.4A754168441@codespeak.net> Author: scoder Date: Fri Feb 15 10:22:32 2008 New Revision: 51509 Added: lxml/trunk/doc/lxml-source-howto.txt Modified: lxml/trunk/ (props changed) lxml/trunk/doc/mkhtml.py Log: r3516 at delle: sbehnel | 2008-02-15 10:21:52 +0100 initial document: starting to work on the source code Added: lxml/trunk/doc/lxml-source-howto.txt ============================================================================== --- (empty file) +++ lxml/trunk/doc/lxml-source-howto.txt Fri Feb 15 10:22:32 2008 @@ -0,0 +1,174 @@ +============================== +How to read the source of lxml +============================== + +:Author: + Stefan Behnel + +.. meta:: + :description: How to read and work on the source code of lxml + :keywords: lxml, XML, Cython, source code, develop, comprehend, understand + +This document describes how to read the source code of lxml_ and how +to start working on it. You might also be interested in the companion +document that describes `how to build lxml from sources`_. + +.. _lxml: http://codespeak.net/lxml +.. _`how to build lxml from sources`: build.html + +.. contents:: +.. + + +What is Cython? +=============== + +.. _Cython: http://cython.org/ +.. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ + +Cython_ is the language that lxml is written in. It is a very +Python-like language that was specifically designed for writing Python +extension modules. The language is so close to Python that the Cython +compiler can actually compile many, many Python programs to C without +major modifications. But the real speed gains of a C compilation come +from type annotations that were added to the language and that allow +Cython to generate very efficient C code. + +The reason why Cython (or actually its predecessor Pyrex_ at the time) +was chosen as an implementation language for lxml, is that it makes it +very easy to interface with both the Python world and external C code. +Cython generates all the necessary glue code for the Python API, +including Python types and reference counting for Python objects. +Calling into C code is not more than declaring the signature of the +function and maybe some variables as being C types, pointers or +structs, and then calling it. The rest of the code is just plain +Python code. + + +Where to start? +=============== + +First of all, read `how to build lxml from sources` to learn how to +retrieve the source code from the Subversion repository and how to set +up a build environment. The source code lives in the subdirectory +``src`` of the checkout. The documentation (which is written in the +`ReStructured Text`_ format) lives in the ``doc`` directory. + +.. _`ReStructured Text`: + +The main extension modules in lxml are ``lxml.etree`` and +``lxml.objectify``. All main modules have the file extension +``.pyx``, which shows the descendence from Pyrex. As usual in Python, +the main files start with a short description and a couple of imports. +Cython destinguishes between the run-time ``import`` statement (as +known from Python) and the compile-time ``cimport`` statement, which +imports C declarations, either from external libraries or from other +Cython modules. + + +Concepts +-------- + +* proxies +* naming conventions +* + + +lxml.etree +---------- + +The main module, ``lxml.etree``, is in the file ``lxml.etree.pyx``. +It implements the main functions and types of the ElementTree API, and +is therefore the best place to start if you want to find out how a +specific feature is implemented. + +At the very end of the file, it contains a series of ``include`` +statements that merge the rest of the implementation into the +generated C code. Yes, you read right: no importing, no source file +namespacing, just plain good old include and a huge C code result that +we throw right into the C compiler. + +The main files are: + +proxy.pxi: + + Very low-level functions for memory allocation/deallocation + and Element proxy handling. Ignoring this for the beginning + will keep your head from exploding. + +apihelpers.pxi: + + Private C helper functions. Most of the little functions that are + used all over the place are defined here. This includes things + like reading out the text content of a libxml2 tree node, checking + input from the API level, creating a new Element node or handling + attribute values. If you want to work on the lxml code, you + should keep these functions in the back of your head, as they will + definitely make your life easier. + +xmlerror.pxi: + + Error log handling. All error messages that libxml2 generates + internally walk through the code in this file to end up in lxml's + Python level error logs. + + At the end of the file, you will find a long list of named error + codes. It is generated from the libxml2 HTML documentation (using + lxml, of course). See the script ``update-error-constants.py`` + for this. + +classlookup.pxi: + + Element class lookup mechanisms. The main API and engines for + those who want to define custom Element classes and inject them + into lxml. + +nsclasses.pxi: + + Namespace implementation and registry. The registry and engine + for Element classes that use the ElementNamespaceClassLookup + scheme. + +docloader.pxi: + + Support for custom document loaders. Base class and registry for + custom document resolvers. + +parser.pxi: + + Parsers for XML and HTML. This is the main parser engine. It's + the reason why you can parse a document from various sources in + two lines of Python code. It's definitely not the right place to + start reading lxml's soure code. + +parsertarget.pxi: + + ET Parser target. + +serializer.pxi: + + XML output functions + +iterparse.pxi: + + incremental XML parsing + +xmlid.pxi: + + XMLID and IDDict + +xinclude.pxi: + + XInclude + +extensions.pxi: + + XPath/XSLT extension functions + +xpath.pxi: + + XPath evaluation + +xslt.pxi: + + XSL transformations Modified: lxml/trunk/doc/mkhtml.py ============================================================================== --- lxml/trunk/doc/mkhtml.py (original) +++ lxml/trunk/doc/mkhtml.py Fri Feb 15 10:22:32 2008 @@ -3,13 +3,14 @@ SITE_STRUCTURE = [ ('lxml', ('main.txt', 'intro.txt', 'lxml2.txt', 'FAQ.txt', - 'compatibility.txt', 'performance.txt', 'build.txt')), + 'compatibility.txt', 'performance.txt')), ('Developing with lxml', ('tutorial.txt', 'api.txt', 'parsing.txt', 'validation.txt', 'xpathxslt.txt', 'objectify.txt', 'lxmlhtml.txt', 'cssselect.txt', 'elementsoup.txt')), ('Extending lxml', ('resolvers.txt', 'extensions.txt', 'element_classes.txt', 'sax.txt', 'capi.txt')), + ('Developing lxml', ('build.txt', 'lxml-source-howto.txt')), ] RST2HTML_OPTIONS = " ".join([ From scoder at codespeak.net Fri Feb 15 10:35:51 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 15 Feb 2008 10:35:51 +0100 (CET) Subject: [Lxml-checkins] r51510 - in lxml/trunk: . doc Message-ID: <20080215093551.0F48B168471@codespeak.net> Author: scoder Date: Fri Feb 15 10:35:49 2008 New Revision: 51510 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/lxml-source-howto.txt Log: r3519 at delle: sbehnel | 2008-02-15 10:35:15 +0100 doc structure Modified: lxml/trunk/doc/lxml-source-howto.txt ============================================================================== --- lxml/trunk/doc/lxml-source-howto.txt (original) +++ lxml/trunk/doc/lxml-source-howto.txt Fri Feb 15 10:35:49 2008 @@ -67,7 +67,7 @@ Concepts --------- +======== * proxies * naming conventions @@ -75,7 +75,7 @@ lxml.etree ----------- +========== The main module, ``lxml.etree``, is in the file ``lxml.etree.pyx``. It implements the main functions and types of the ElementTree API, and From scoder at codespeak.net Fri Feb 15 10:39:38 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 15 Feb 2008 10:39:38 +0100 (CET) Subject: [Lxml-checkins] r51512 - in lxml/trunk: . doc Message-ID: <20080215093938.407D0168471@codespeak.net> Author: scoder Date: Fri Feb 15 10:39:37 2008 New Revision: 51512 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/lxml-source-howto.txt Log: r3521 at delle: sbehnel | 2008-02-15 10:39:01 +0100 doc section on documentation Modified: lxml/trunk/doc/lxml-source-howto.txt ============================================================================== --- lxml/trunk/doc/lxml-source-howto.txt (original) +++ lxml/trunk/doc/lxml-source-howto.txt Fri Feb 15 10:39:37 2008 @@ -15,6 +15,7 @@ .. _lxml: http://codespeak.net/lxml .. _`how to build lxml from sources`: build.html +.. _`ReStructured Text`: http://docutils.sourceforge.net/rst.html .. contents:: .. @@ -54,8 +55,6 @@ ``src`` of the checkout. The documentation (which is written in the `ReStructured Text`_ format) lives in the ``doc`` directory. -.. _`ReStructured Text`: - The main extension modules in lxml are ``lxml.etree`` and ``lxml.objectify``. All main modules have the file extension ``.pyx``, which shows the descendence from Pyrex. As usual in Python, @@ -71,7 +70,16 @@ * proxies * naming conventions -* +* ... + + +The documentation +================= + +* docs in ``doc`` directory +* `ReStructured Text`_ format +* generated through ``mkhtml.py`` script +* ... lxml.etree From lxml-checkins at codespeak.net Fri Feb 15 15:14:05 2008 From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net) Date: Fri, 15 Feb 2008 15:14:05 +0100 (CET) Subject: [Lxml-checkins] February 76% OFF Message-ID: <20080215161343.23936.qmail@pmsn.179.79.124.92.sable.dsl.krasnet.ru> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20080215/c0d4b6b5/attachment.htm From scoder at codespeak.net Fri Feb 15 15:15:14 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 15 Feb 2008 15:15:14 +0100 (CET) Subject: [Lxml-checkins] r51537 - in lxml/trunk: . src/lxml Message-ID: <20080215141514.ED4B416847F@codespeak.net> Author: scoder Date: Fri Feb 15 15:15:14 2008 New Revision: 51537 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/xmlerror.pxd lxml/trunk/src/lxml/xmlerror.pxi lxml/trunk/update-error-constants.py Log: r3524 at delle: sbehnel | 2008-02-15 13:39:53 +0100 integrate all error type enums: parser, XPath, schema, relaxng Modified: lxml/trunk/src/lxml/xmlerror.pxd ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxd (original) +++ lxml/trunk/src/lxml/xmlerror.pxd Fri Feb 15 15:15:14 2008 @@ -767,6 +767,101 @@ XML_I18N_NO_OUTPUT = 6004 # 6004 XML_CHECK_ = 6005 # 5033 XML_CHECK_X = 6006 # 503 + + ctypedef enum xmlXPathError: + XPATH_EXPRESSION_OK = 0 + XPATH_NUMBER_ERROR = 1 + XPATH_UNFINISHED_LITERAL_ERROR = 2 + XPATH_START_LITERAL_ERROR = 3 + XPATH_VARIABLE_REF_ERROR = 4 + XPATH_UNDEF_VARIABLE_ERROR = 5 + XPATH_INVALID_PREDICATE_ERROR = 6 + XPATH_EXPR_ERROR = 7 + XPATH_UNCLOSED_ERROR = 8 + XPATH_UNKNOWN_FUNC_ERROR = 9 + XPATH_INVALID_OPERAND = 10 + XPATH_INVALID_TYPE = 11 + XPATH_INVALID_ARITY = 12 + XPATH_INVALID_CTXT_SIZE = 13 + XPATH_INVALID_CTXT_POSITION = 14 + XPATH_MEMORY_ERROR = 15 + XPTR_SYNTAX_ERROR = 16 + XPTR_RESOURCE_ERROR = 17 + XPTR_SUB_RESOURCE_ERROR = 18 + XPATH_UNDEF_PREFIX_ERROR = 19 + XPATH_ENCODING_ERROR = 20 + XPATH_INVALID_CHAR_ERROR = 21 + XPATH_INVALID_CTXT = 22 + + ctypedef enum xmlSchemaValidError: + XML_SCHEMAS_ERR_OK = 0 + XML_SCHEMAS_ERR_NOROOT = 1 + XML_SCHEMAS_ERR_UNDECLAREDELEM = 2 + XML_SCHEMAS_ERR_NOTTOPLEVEL = 3 + XML_SCHEMAS_ERR_MISSING = 4 + XML_SCHEMAS_ERR_WRONGELEM = 5 + XML_SCHEMAS_ERR_NOTYPE = 6 + XML_SCHEMAS_ERR_NOROLLBACK = 7 + XML_SCHEMAS_ERR_ISABSTRACT = 8 + XML_SCHEMAS_ERR_NOTEMPTY = 9 + XML_SCHEMAS_ERR_ELEMCONT = 10 + XML_SCHEMAS_ERR_HAVEDEFAULT = 11 + XML_SCHEMAS_ERR_NOTNILLABLE = 12 + XML_SCHEMAS_ERR_EXTRACONTENT = 13 + XML_SCHEMAS_ERR_INVALIDATTR = 14 + XML_SCHEMAS_ERR_INVALIDELEM = 15 + XML_SCHEMAS_ERR_NOTDETERMINIST = 16 + XML_SCHEMAS_ERR_CONSTRUCT = 17 + XML_SCHEMAS_ERR_INTERNAL = 18 + XML_SCHEMAS_ERR_NOTSIMPLE = 19 + XML_SCHEMAS_ERR_ATTRUNKNOWN = 20 + XML_SCHEMAS_ERR_ATTRINVALID = 21 + XML_SCHEMAS_ERR_VALUE = 22 + XML_SCHEMAS_ERR_FACET = 23 + XML_SCHEMAS_ERR_ = 24 + XML_SCHEMAS_ERR_XXX = 25 + + ctypedef enum xmlRelaxNGValidErr: + XML_RELAXNG_OK = 0 + XML_RELAXNG_ERR_MEMORY = 1 + XML_RELAXNG_ERR_TYPE = 2 + XML_RELAXNG_ERR_TYPEVAL = 3 + XML_RELAXNG_ERR_DUPID = 4 + XML_RELAXNG_ERR_TYPECMP = 5 + XML_RELAXNG_ERR_NOSTATE = 6 + XML_RELAXNG_ERR_NODEFINE = 7 + XML_RELAXNG_ERR_LISTEXTRA = 8 + XML_RELAXNG_ERR_LISTEMPTY = 9 + XML_RELAXNG_ERR_INTERNODATA = 10 + XML_RELAXNG_ERR_INTERSEQ = 11 + XML_RELAXNG_ERR_INTEREXTRA = 12 + XML_RELAXNG_ERR_ELEMNAME = 13 + XML_RELAXNG_ERR_ATTRNAME = 14 + XML_RELAXNG_ERR_ELEMNONS = 15 + XML_RELAXNG_ERR_ATTRNONS = 16 + XML_RELAXNG_ERR_ELEMWRONGNS = 17 + XML_RELAXNG_ERR_ATTRWRONGNS = 18 + XML_RELAXNG_ERR_ELEMEXTRANS = 19 + XML_RELAXNG_ERR_ATTREXTRANS = 20 + XML_RELAXNG_ERR_ELEMNOTEMPTY = 21 + XML_RELAXNG_ERR_NOELEM = 22 + XML_RELAXNG_ERR_NOTELEM = 23 + XML_RELAXNG_ERR_ATTRVALID = 24 + XML_RELAXNG_ERR_CONTENTVALID = 25 + XML_RELAXNG_ERR_EXTRACONTENT = 26 + XML_RELAXNG_ERR_INVALIDATTR = 27 + XML_RELAXNG_ERR_DATAELEM = 28 + XML_RELAXNG_ERR_VALELEM = 29 + XML_RELAXNG_ERR_LISTELEM = 30 + XML_RELAXNG_ERR_DATATYPE = 31 + XML_RELAXNG_ERR_VALUE = 32 + XML_RELAXNG_ERR_LIST = 33 + XML_RELAXNG_ERR_NOGRAMMAR = 34 + XML_RELAXNG_ERR_EXTRADATA = 35 + XML_RELAXNG_ERR_LACKDATA = 36 + XML_RELAXNG_ERR_INTERNAL = 37 + XML_RELAXNG_ERR_ELEMWRONG = 38 + XML_RELAXNG_ERR_TEXTWRONG = 39 # --- END: GENERATED CONSTANTS --- cdef extern from "libxml/xmlerror.h": Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Fri Feb 15 15:15:14 2008 @@ -523,9 +523,12 @@ cdef void __initErrorConstants(): "Called at setup time to parse the constants and build the classes below." find_constants = re.compile(r"\s*([a-zA-Z0-9_]+)\s*=\s*([0-9]+)").findall - const_defs = ((ErrorLevels, __ERROR_LEVELS), - (ErrorDomains, __ERROR_DOMAINS), - (ErrorTypes, __ERROR_TYPES)) + const_defs = ((ErrorLevels, __ERROR_LEVELS), + (ErrorDomains, __ERROR_DOMAINS), + (ErrorTypes, __PARSER_ERROR_TYPES), + (XPathErrorTypes, __XPATH_ERROR_TYPES), + (XMLSchemaErrorTypes, __XMLSCHEMA_ERROR_TYPES), + (RelaxNGErrorTypes, __RELAXNG_ERROR_TYPES)) for cls, constant_tuple in const_defs: reverse_dict = {} cls._names = reverse_dict @@ -546,6 +549,15 @@ class ErrorTypes: "Libxml2 error types" +class XPathErrorTypes: + "Libxml2 XPath error types" + +class XMLSchemaErrorTypes: + "Libxml2 XML Schema error types" + +class RelaxNGErrorTypes: + "Libxml2 RelaxNG error types" + # --- BEGIN: GENERATED CONSTANTS --- # This section is generated by the script 'update-error-constants.py'. @@ -596,8 +608,8 @@ I18N=27 """,) -cdef object __ERROR_TYPES -__ERROR_TYPES = ("""\ +cdef object __PARSER_ERROR_TYPES +__PARSER_ERROR_TYPES = ("""\ ERR_OK=0 ERR_INTERNAL_ERROR=1 ERR_NO_MEMORY=2 @@ -1343,6 +1355,107 @@ CHECK_=6005 CHECK_X=6006 """,) + +cdef object __XPATH_ERROR_TYPES +__XPATH_ERROR_TYPES = ("""\ +XPATH_EXPRESSION_OK=0 +XPATH_NUMBER_ERROR=1 +XPATH_UNFINISHED_LITERAL_ERROR=2 +XPATH_START_LITERAL_ERROR=3 +XPATH_VARIABLE_REF_ERROR=4 +XPATH_UNDEF_VARIABLE_ERROR=5 +XPATH_INVALID_PREDICATE_ERROR=6 +XPATH_EXPR_ERROR=7 +XPATH_UNCLOSED_ERROR=8 +XPATH_UNKNOWN_FUNC_ERROR=9 +XPATH_INVALID_OPERAND=10 +XPATH_INVALID_TYPE=11 +XPATH_INVALID_ARITY=12 +XPATH_INVALID_CTXT_SIZE=13 +XPATH_INVALID_CTXT_POSITION=14 +XPATH_MEMORY_ERROR=15 +XPTR_SYNTAX_ERROR=16 +XPTR_RESOURCE_ERROR=17 +XPTR_SUB_RESOURCE_ERROR=18 +XPATH_UNDEF_PREFIX_ERROR=19 +XPATH_ENCODING_ERROR=20 +XPATH_INVALID_CHAR_ERROR=21 +XPATH_INVALID_CTXT=22 +""",) + +cdef object __XMLSCHEMA_ERROR_TYPES +__XMLSCHEMA_ERROR_TYPES = ("""\ +SCHEMAS_ERR_OK=0 +SCHEMAS_ERR_NOROOT=1 +SCHEMAS_ERR_UNDECLAREDELEM=2 +SCHEMAS_ERR_NOTTOPLEVEL=3 +SCHEMAS_ERR_MISSING=4 +SCHEMAS_ERR_WRONGELEM=5 +SCHEMAS_ERR_NOTYPE=6 +SCHEMAS_ERR_NOROLLBACK=7 +SCHEMAS_ERR_ISABSTRACT=8 +SCHEMAS_ERR_NOTEMPTY=9 +SCHEMAS_ERR_ELEMCONT=10 +SCHEMAS_ERR_HAVEDEFAULT=11 +SCHEMAS_ERR_NOTNILLABLE=12 +SCHEMAS_ERR_EXTRACONTENT=13 +SCHEMAS_ERR_INVALIDATTR=14 +SCHEMAS_ERR_INVALIDELEM=15 +SCHEMAS_ERR_NOTDETERMINIST=16 +SCHEMAS_ERR_CONSTRUCT=17 +SCHEMAS_ERR_INTERNAL=18 +SCHEMAS_ERR_NOTSIMPLE=19 +SCHEMAS_ERR_ATTRUNKNOWN=20 +SCHEMAS_ERR_ATTRINVALID=21 +SCHEMAS_ERR_VALUE=22 +SCHEMAS_ERR_FACET=23 +SCHEMAS_ERR_=24 +SCHEMAS_ERR_XXX=25 +""",) + +cdef object __RELAXNG_ERROR_TYPES +__RELAXNG_ERROR_TYPES = ("""\ +RELAXNG_OK=0 +RELAXNG_ERR_MEMORY=1 +RELAXNG_ERR_TYPE=2 +RELAXNG_ERR_TYPEVAL=3 +RELAXNG_ERR_DUPID=4 +RELAXNG_ERR_TYPECMP=5 +RELAXNG_ERR_NOSTATE=6 +RELAXNG_ERR_NODEFINE=7 +RELAXNG_ERR_LISTEXTRA=8 +RELAXNG_ERR_LISTEMPTY=9 +RELAXNG_ERR_INTERNODATA=10 +RELAXNG_ERR_INTERSEQ=11 +RELAXNG_ERR_INTEREXTRA=12 +RELAXNG_ERR_ELEMNAME=13 +RELAXNG_ERR_ATTRNAME=14 +RELAXNG_ERR_ELEMNONS=15 +RELAXNG_ERR_ATTRNONS=16 +RELAXNG_ERR_ELEMWRONGNS=17 +RELAXNG_ERR_ATTRWRONGNS=18 +RELAXNG_ERR_ELEMEXTRANS=19 +RELAXNG_ERR_ATTREXTRANS=20 +RELAXNG_ERR_ELEMNOTEMPTY=21 +RELAXNG_ERR_NOELEM=22 +RELAXNG_ERR_NOTELEM=23 +RELAXNG_ERR_ATTRVALID=24 +RELAXNG_ERR_CONTENTVALID=25 +RELAXNG_ERR_EXTRACONTENT=26 +RELAXNG_ERR_INVALIDATTR=27 +RELAXNG_ERR_DATAELEM=28 +RELAXNG_ERR_VALELEM=29 +RELAXNG_ERR_LISTELEM=30 +RELAXNG_ERR_DATATYPE=31 +RELAXNG_ERR_VALUE=32 +RELAXNG_ERR_LIST=33 +RELAXNG_ERR_NOGRAMMAR=34 +RELAXNG_ERR_EXTRADATA=35 +RELAXNG_ERR_LACKDATA=36 +RELAXNG_ERR_INTERNAL=37 +RELAXNG_ERR_ELEMWRONG=38 +RELAXNG_ERR_TEXTWRONG=39 +""",) # --- END: GENERATED CONSTANTS --- __initErrorConstants() Modified: lxml/trunk/update-error-constants.py ============================================================================== --- lxml/trunk/update-error-constants.py (original) +++ lxml/trunk/update-error-constants.py Fri Feb 15 15:15:14 2008 @@ -11,20 +11,24 @@ print sys.argv[0], "/path/to/libxml2-doc-dir" sys.exit(len(sys.argv) > 1) -HTML_FILE = os.path.join(sys.argv[1], 'html', 'libxml-xmlerror.html') -os.stat(HTML_FILE) # raise an error if we can't find it +HTML_DIR = os.path.join(sys.argv[1], 'html') +os.stat(HTML_DIR) # raise an error if we can't find it sys.path.insert(0, 'src') from lxml import etree # map enum name to Python variable name and alignment for constant name ENUM_MAP = { - 'xmlErrorLevel' : ('__ERROR_LEVELS', 'XML_ERR_'), - 'xmlErrorDomain' : ('__ERROR_DOMAINS', 'XML_FROM_'), - 'xmlParserErrors' : ('__ERROR_TYPES', 'XML_') + 'xmlErrorLevel' : ('__ERROR_LEVELS', 'XML_ERR_'), + 'xmlErrorDomain' : ('__ERROR_DOMAINS', 'XML_FROM_'), + 'xmlParserErrors' : ('__PARSER_ERROR_TYPES', 'XML_'), + 'xmlXPathError' : ('__XPATH_ERROR_TYPES', ''), + 'xmlSchemaValidError' : ('__XMLSCHEMA_ERROR_TYPES', 'XML_'), + 'xmlRelaxNGValidErr' : ('__RELAXNG_ERROR_TYPES', 'XML_'), } -ENUM_ORDER = ('xmlErrorLevel', 'xmlErrorDomain', 'xmlParserErrors') +ENUM_ORDER = ('xmlErrorLevel', 'xmlErrorDomain', 'xmlParserErrors', + 'xmlXPathError', 'xmlSchemaValidError', 'xmlRelaxNGValidErr') COMMENT = """ # This section is generated by the script '%s'. @@ -61,27 +65,40 @@ f.write(''.join(post)) f.close() -def parse_enums(html_file): +def parse_enums(html_dir, html_filename, enum_dict): PARSE_ENUM_NAME = re.compile('\s*enum\s+(\w+)\s*{', re.I).match PARSE_ENUM_VALUE = re.compile('\s*=\s+([0-9]+)\s*(?::\s*(.*))?').match - tree = etree.parse(html_file) + tree = etree.parse(os.path.join(html_dir, html_filename)) xpath = etree.XPathEvaluator( tree, namespaces={'html' : 'http://www.w3.org/1999/xhtml'}) + collect_text = etree.XPath("string()") - enum_dict = {} - enums = xpath.evaluate("//html:pre[@class = 'programlisting' and contains(text(), 'Enum') and html:a[@name]]") + enums = xpath.evaluate("//html:pre[@class = 'programlisting' and contains(text(), 'Enum')]") for enum in enums: - enum_name = PARSE_ENUM_NAME(enum.text).group(1) + enum_name = PARSE_ENUM_NAME(collect_text(enum)) + if not enum_name or enum_name not in ENUM_MAP: + continue + enum_name = enum_name.group(1) print "Found enum", enum_name entries = [] - enum_dict[enum_name] = entries for child in enum: name = child.text - value, descr = PARSE_ENUM_VALUE(child.tail).groups() + match = PARSE_ENUM_VALUE(child.tail) + if not match: + print("Ignoring enum %s (failed to parse field '%s')" % ( + enum_name, name)) + break + value, descr = match.groups() entries.append((name, int(value), descr)) + else: + enum_dict[enum_name] = entries return enum_dict -enum_dict = parse_enums(HTML_FILE) +enum_dict = {} +parse_enums(HTML_DIR, 'libxml-xmlerror.html', enum_dict) +parse_enums(HTML_DIR, 'libxml-xpath.html', enum_dict) +parse_enums(HTML_DIR, 'libxml-xmlschemas.html', enum_dict) +parse_enums(HTML_DIR, 'libxml-relaxng.html', enum_dict) # regenerate source files pxi_result = [] From scoder at codespeak.net Fri Feb 15 15:15:15 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 15 Feb 2008 15:15:15 +0100 (CET) Subject: [Lxml-checkins] r51536 - in lxml/trunk: . doc Message-ID: <20080215141515.5EA26168487@codespeak.net> Author: scoder Date: Fri Feb 15 15:15:09 2008 New Revision: 51536 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/lxml-source-howto.txt Log: r3523 at delle: sbehnel | 2008-02-15 13:28:56 +0100 doc structure Modified: lxml/trunk/doc/lxml-source-howto.txt ============================================================================== --- lxml/trunk/doc/lxml-source-howto.txt (original) +++ lxml/trunk/doc/lxml-source-howto.txt Fri Feb 15 15:15:09 2008 @@ -66,7 +66,7 @@ Concepts -======== +-------- * proxies * naming conventions @@ -74,7 +74,7 @@ The documentation -================= +----------------- * docs in ``doc`` directory * `ReStructured Text`_ format From scoder at codespeak.net Fri Feb 15 15:15:18 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 15 Feb 2008 15:15:18 +0100 (CET) Subject: [Lxml-checkins] r51538 - lxml/trunk Message-ID: <20080215141518.4F0261684C1@codespeak.net> Author: scoder Date: Fri Feb 15 15:15:17 2008 New Revision: 51538 Modified: lxml/trunk/ (props changed) lxml/trunk/update-error-constants.py Log: r3525 at delle: sbehnel | 2008-02-15 15:08:18 +0100 error type constants of XPath errors and XML Schema errors are not needed Modified: lxml/trunk/update-error-constants.py ============================================================================== --- lxml/trunk/update-error-constants.py (original) +++ lxml/trunk/update-error-constants.py Fri Feb 15 15:15:17 2008 @@ -22,13 +22,18 @@ 'xmlErrorLevel' : ('__ERROR_LEVELS', 'XML_ERR_'), 'xmlErrorDomain' : ('__ERROR_DOMAINS', 'XML_FROM_'), 'xmlParserErrors' : ('__PARSER_ERROR_TYPES', 'XML_'), - 'xmlXPathError' : ('__XPATH_ERROR_TYPES', ''), - 'xmlSchemaValidError' : ('__XMLSCHEMA_ERROR_TYPES', 'XML_'), +# 'xmlXPathError' : ('__XPATH_ERROR_TYPES', ''), +# 'xmlSchemaValidError' : ('__XMLSCHEMA_ERROR_TYPES', 'XML_'), 'xmlRelaxNGValidErr' : ('__RELAXNG_ERROR_TYPES', 'XML_'), } -ENUM_ORDER = ('xmlErrorLevel', 'xmlErrorDomain', 'xmlParserErrors', - 'xmlXPathError', 'xmlSchemaValidError', 'xmlRelaxNGValidErr') +ENUM_ORDER = ( + 'xmlErrorLevel', + 'xmlErrorDomain', + 'xmlParserErrors', +# 'xmlXPathError', +# 'xmlSchemaValidError', + 'xmlRelaxNGValidErr') COMMENT = """ # This section is generated by the script '%s'. @@ -65,20 +70,23 @@ f.write(''.join(post)) f.close() +collect_text = etree.XPath("string()") +find_enums = etree.XPath( + "//html:pre[@class = 'programlisting' and contains(text(), 'Enum')]", + namespaces = {'html' : 'http://www.w3.org/1999/xhtml'}) + def parse_enums(html_dir, html_filename, enum_dict): PARSE_ENUM_NAME = re.compile('\s*enum\s+(\w+)\s*{', re.I).match PARSE_ENUM_VALUE = re.compile('\s*=\s+([0-9]+)\s*(?::\s*(.*))?').match tree = etree.parse(os.path.join(html_dir, html_filename)) - xpath = etree.XPathEvaluator( - tree, namespaces={'html' : 'http://www.w3.org/1999/xhtml'}) - collect_text = etree.XPath("string()") - - enums = xpath.evaluate("//html:pre[@class = 'programlisting' and contains(text(), 'Enum')]") + enums = find_enums(tree) for enum in enums: enum_name = PARSE_ENUM_NAME(collect_text(enum)) - if not enum_name or enum_name not in ENUM_MAP: + if not enum_name: continue enum_name = enum_name.group(1) + if enum_name not in ENUM_MAP: + continue print "Found enum", enum_name entries = [] for child in enum: @@ -96,8 +104,8 @@ enum_dict = {} parse_enums(HTML_DIR, 'libxml-xmlerror.html', enum_dict) -parse_enums(HTML_DIR, 'libxml-xpath.html', enum_dict) -parse_enums(HTML_DIR, 'libxml-xmlschemas.html', enum_dict) +#parse_enums(HTML_DIR, 'libxml-xpath.html', enum_dict) +#parse_enums(HTML_DIR, 'libxml-xmlschemas.html', enum_dict) parse_enums(HTML_DIR, 'libxml-relaxng.html', enum_dict) # regenerate source files From scoder at codespeak.net Fri Feb 15 15:15:22 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 15 Feb 2008 15:15:22 +0100 (CET) Subject: [Lxml-checkins] r51539 - in lxml/trunk: . doc src/lxml Message-ID: <20080215141522.3397A1684C2@codespeak.net> Author: scoder Date: Fri Feb 15 15:15:21 2008 New Revision: 51539 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/doc/validation.txt lxml/trunk/src/lxml/xmlerror.pxd lxml/trunk/src/lxml/xmlerror.pxi Log: r3526 at delle: sbehnel | 2008-02-15 15:12:34 +0100 error type constants of XPath errors and XML Schema errors are not needed Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Feb 15 15:15:21 2008 @@ -11,6 +11,8 @@ Bugs fixed ---------- +* Error type names in RelaxNG were reported incorrectly. + * Slice deletion bug fixed in objectify. Other changes Modified: lxml/trunk/doc/validation.txt ============================================================================== --- lxml/trunk/doc/validation.txt (original) +++ lxml/trunk/doc/validation.txt Fri Feb 15 15:15:21 2008 @@ -182,14 +182,23 @@ >>> log = relaxng.error_log >>> print log.last_error - :1:0:ERROR:RELAXNGV:ERR_LT_IN_ATTRIBUTE: Did not expect element c there + :1:0:ERROR:RELAXNGV:RELAXNG_ERR_ELEMWRONG: Did not expect element c there You can see that the error (ERROR) happened during RelaxNG validation -(RELAXNGV). The message then tells you what went wrong. Note that this error -log is local to the RelaxNG object. It will only contain log entries that -appeared during the validation. The DocumentInvalid exception raised by the -``assertValid`` method above provides access to the global error log (like all -other lxml exceptions). +(RELAXNGV). The message then tells you what went wrong. You can also +look at the error domain and its type directly:: + + >>> error = log.last_error + >>> print error.domain_name + RELAXNGV + >>> print error.type_name + RELAXNG_ERR_ELEMWRONG + +Note that this error log is local to the RelaxNG object. It will only +contain log entries that appeared during the validation. The +DocumentInvalid exception raised by the ``assertValid`` method above +provides access to the global error log (like all other lxml +exceptions). Similar to XSLT, there's also a less efficient but easier shortcut method to do one-shot RelaxNG validation:: Modified: lxml/trunk/src/lxml/xmlerror.pxd ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxd (original) +++ lxml/trunk/src/lxml/xmlerror.pxd Fri Feb 15 15:15:21 2008 @@ -768,59 +768,6 @@ XML_CHECK_ = 6005 # 5033 XML_CHECK_X = 6006 # 503 - ctypedef enum xmlXPathError: - XPATH_EXPRESSION_OK = 0 - XPATH_NUMBER_ERROR = 1 - XPATH_UNFINISHED_LITERAL_ERROR = 2 - XPATH_START_LITERAL_ERROR = 3 - XPATH_VARIABLE_REF_ERROR = 4 - XPATH_UNDEF_VARIABLE_ERROR = 5 - XPATH_INVALID_PREDICATE_ERROR = 6 - XPATH_EXPR_ERROR = 7 - XPATH_UNCLOSED_ERROR = 8 - XPATH_UNKNOWN_FUNC_ERROR = 9 - XPATH_INVALID_OPERAND = 10 - XPATH_INVALID_TYPE = 11 - XPATH_INVALID_ARITY = 12 - XPATH_INVALID_CTXT_SIZE = 13 - XPATH_INVALID_CTXT_POSITION = 14 - XPATH_MEMORY_ERROR = 15 - XPTR_SYNTAX_ERROR = 16 - XPTR_RESOURCE_ERROR = 17 - XPTR_SUB_RESOURCE_ERROR = 18 - XPATH_UNDEF_PREFIX_ERROR = 19 - XPATH_ENCODING_ERROR = 20 - XPATH_INVALID_CHAR_ERROR = 21 - XPATH_INVALID_CTXT = 22 - - ctypedef enum xmlSchemaValidError: - XML_SCHEMAS_ERR_OK = 0 - XML_SCHEMAS_ERR_NOROOT = 1 - XML_SCHEMAS_ERR_UNDECLAREDELEM = 2 - XML_SCHEMAS_ERR_NOTTOPLEVEL = 3 - XML_SCHEMAS_ERR_MISSING = 4 - XML_SCHEMAS_ERR_WRONGELEM = 5 - XML_SCHEMAS_ERR_NOTYPE = 6 - XML_SCHEMAS_ERR_NOROLLBACK = 7 - XML_SCHEMAS_ERR_ISABSTRACT = 8 - XML_SCHEMAS_ERR_NOTEMPTY = 9 - XML_SCHEMAS_ERR_ELEMCONT = 10 - XML_SCHEMAS_ERR_HAVEDEFAULT = 11 - XML_SCHEMAS_ERR_NOTNILLABLE = 12 - XML_SCHEMAS_ERR_EXTRACONTENT = 13 - XML_SCHEMAS_ERR_INVALIDATTR = 14 - XML_SCHEMAS_ERR_INVALIDELEM = 15 - XML_SCHEMAS_ERR_NOTDETERMINIST = 16 - XML_SCHEMAS_ERR_CONSTRUCT = 17 - XML_SCHEMAS_ERR_INTERNAL = 18 - XML_SCHEMAS_ERR_NOTSIMPLE = 19 - XML_SCHEMAS_ERR_ATTRUNKNOWN = 20 - XML_SCHEMAS_ERR_ATTRINVALID = 21 - XML_SCHEMAS_ERR_VALUE = 22 - XML_SCHEMAS_ERR_FACET = 23 - XML_SCHEMAS_ERR_ = 24 - XML_SCHEMAS_ERR_XXX = 25 - ctypedef enum xmlRelaxNGValidErr: XML_RELAXNG_OK = 0 XML_RELAXNG_ERR_MEMORY = 1 Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Fri Feb 15 15:15:21 2008 @@ -88,7 +88,11 @@ property type_name: def __get__(self): - return ErrorTypes._getName(self.type, "unknown") + if self.domain == ErrorDomains.RELAXNGV: + getName = RelaxNGErrorTypes._getName + else: + getName = ErrorTypes._getName + return getName(self.type, "unknown") property level_name: def __get__(self): @@ -526,8 +530,6 @@ const_defs = ((ErrorLevels, __ERROR_LEVELS), (ErrorDomains, __ERROR_DOMAINS), (ErrorTypes, __PARSER_ERROR_TYPES), - (XPathErrorTypes, __XPATH_ERROR_TYPES), - (XMLSchemaErrorTypes, __XMLSCHEMA_ERROR_TYPES), (RelaxNGErrorTypes, __RELAXNG_ERROR_TYPES)) for cls, constant_tuple in const_defs: reverse_dict = {} @@ -540,6 +542,7 @@ python.PyObject_SetAttr(cls, name, value) python.PyDict_SetItem(reverse_dict, value, name) + class ErrorLevels: "Libxml2 error levels" @@ -549,12 +552,6 @@ class ErrorTypes: "Libxml2 error types" -class XPathErrorTypes: - "Libxml2 XPath error types" - -class XMLSchemaErrorTypes: - "Libxml2 XML Schema error types" - class RelaxNGErrorTypes: "Libxml2 RelaxNG error types" @@ -1356,63 +1353,6 @@ CHECK_X=6006 """,) -cdef object __XPATH_ERROR_TYPES -__XPATH_ERROR_TYPES = ("""\ -XPATH_EXPRESSION_OK=0 -XPATH_NUMBER_ERROR=1 -XPATH_UNFINISHED_LITERAL_ERROR=2 -XPATH_START_LITERAL_ERROR=3 -XPATH_VARIABLE_REF_ERROR=4 -XPATH_UNDEF_VARIABLE_ERROR=5 -XPATH_INVALID_PREDICATE_ERROR=6 -XPATH_EXPR_ERROR=7 -XPATH_UNCLOSED_ERROR=8 -XPATH_UNKNOWN_FUNC_ERROR=9 -XPATH_INVALID_OPERAND=10 -XPATH_INVALID_TYPE=11 -XPATH_INVALID_ARITY=12 -XPATH_INVALID_CTXT_SIZE=13 -XPATH_INVALID_CTXT_POSITION=14 -XPATH_MEMORY_ERROR=15 -XPTR_SYNTAX_ERROR=16 -XPTR_RESOURCE_ERROR=17 -XPTR_SUB_RESOURCE_ERROR=18 -XPATH_UNDEF_PREFIX_ERROR=19 -XPATH_ENCODING_ERROR=20 -XPATH_INVALID_CHAR_ERROR=21 -XPATH_INVALID_CTXT=22 -""",) - -cdef object __XMLSCHEMA_ERROR_TYPES -__XMLSCHEMA_ERROR_TYPES = ("""\ -SCHEMAS_ERR_OK=0 -SCHEMAS_ERR_NOROOT=1 -SCHEMAS_ERR_UNDECLAREDELEM=2 -SCHEMAS_ERR_NOTTOPLEVEL=3 -SCHEMAS_ERR_MISSING=4 -SCHEMAS_ERR_WRONGELEM=5 -SCHEMAS_ERR_NOTYPE=6 -SCHEMAS_ERR_NOROLLBACK=7 -SCHEMAS_ERR_ISABSTRACT=8 -SCHEMAS_ERR_NOTEMPTY=9 -SCHEMAS_ERR_ELEMCONT=10 -SCHEMAS_ERR_HAVEDEFAULT=11 -SCHEMAS_ERR_NOTNILLABLE=12 -SCHEMAS_ERR_EXTRACONTENT=13 -SCHEMAS_ERR_INVALIDATTR=14 -SCHEMAS_ERR_INVALIDELEM=15 -SCHEMAS_ERR_NOTDETERMINIST=16 -SCHEMAS_ERR_CONSTRUCT=17 -SCHEMAS_ERR_INTERNAL=18 -SCHEMAS_ERR_NOTSIMPLE=19 -SCHEMAS_ERR_ATTRUNKNOWN=20 -SCHEMAS_ERR_ATTRINVALID=21 -SCHEMAS_ERR_VALUE=22 -SCHEMAS_ERR_FACET=23 -SCHEMAS_ERR_=24 -SCHEMAS_ERR_XXX=25 -""",) - cdef object __RELAXNG_ERROR_TYPES __RELAXNG_ERROR_TYPES = ("""\ RELAXNG_OK=0 From scoder at codespeak.net Fri Feb 15 15:48:31 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 15 Feb 2008 15:48:31 +0100 (CET) Subject: [Lxml-checkins] r51541 - in lxml/trunk: . doc Message-ID: <20080215144831.013BB16847B@codespeak.net> Author: scoder Date: Fri Feb 15 15:48:30 2008 New Revision: 51541 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/lxml-source-howto.txt Log: r3531 at delle: sbehnel | 2008-02-15 15:47:53 +0100 docs Modified: lxml/trunk/doc/lxml-source-howto.txt ============================================================================== --- lxml/trunk/doc/lxml-source-howto.txt (original) +++ lxml/trunk/doc/lxml-source-howto.txt Fri Feb 15 15:48:30 2008 @@ -45,15 +45,21 @@ structs, and then calling it. The rest of the code is just plain Python code. +Even if you are not familiar with Cython, you should keep in mind that +a slow implementation of a feature is better than none. So, if you +want to contribute and have an idea what code you want to write, feel +free to start with a pure Python implementation. Chances are, if you +get the change officially accepted and integrated, others will take +the time to optimise it so that it runs fast in Cython. + Where to start? =============== First of all, read `how to build lxml from sources` to learn how to -retrieve the source code from the Subversion repository and how to set -up a build environment. The source code lives in the subdirectory -``src`` of the checkout. The documentation (which is written in the -`ReStructured Text`_ format) lives in the ``doc`` directory. +retrieve the source code from the Subversion repository and how to +build it. The source code lives in the subdirectory ``src`` of the +checkout. The main extension modules in lxml are ``lxml.etree`` and ``lxml.objectify``. All main modules have the file extension @@ -65,14 +71,6 @@ Cython modules. -Concepts --------- - -* proxies -* naming conventions -* ... - - The documentation ----------------- @@ -82,10 +80,18 @@ * ... +Concepts +-------- + +* proxies +* naming conventions +* ... + + lxml.etree ========== -The main module, ``lxml.etree``, is in the file ``lxml.etree.pyx``. +The main module, ``lxml.etree``, is in the file **lxml.etree.pyx**. It implements the main functions and types of the ElementTree API, and is therefore the best place to start if you want to find out how a specific feature is implemented. @@ -93,10 +99,10 @@ At the very end of the file, it contains a series of ``include`` statements that merge the rest of the implementation into the generated C code. Yes, you read right: no importing, no source file -namespacing, just plain good old include and a huge C code result that -we throw right into the C compiler. +namespacing, just plain good old include and a huge C code result of +more than 100,000 lines that we throw right into the C compiler. -The main files are: +The main include files are: proxy.pxi: @@ -155,28 +161,45 @@ serializer.pxi: - XML output functions + XML output functions. Basically everything that creates byte + sequences from XML trees. iterparse.pxi: - incremental XML parsing + Incremental XML parsing. An iterator class that builds iterparse + events while parsing. xmlid.pxi: - XMLID and IDDict + XMLID and IDDict, a dictionary-like way to find Elements by their + XML-ID attribute. xinclude.pxi: - XInclude + XInclude implementation. extensions.pxi: - XPath/XSLT extension functions + Infrastructure for extension functions in XPath/XSLT, including + XPath value conversion and function registration. xpath.pxi: - XPath evaluation + XPath evaluators. xslt.pxi: - XSL transformations + XSL transformations, including the ``XSLT`` class, document lookup + handling and access control. + + +lxml.objectify +============== + +* ... + + +lxml.html +========= + +* ... From scoder at codespeak.net Fri Feb 15 17:32:03 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 15 Feb 2008 17:32:03 +0100 (CET) Subject: [Lxml-checkins] r51544 - in lxml/branch/lxml-2.0: . doc src/lxml Message-ID: <20080215163203.CB1F5168437@codespeak.net> Author: scoder Date: Fri Feb 15 17:32:03 2008 New Revision: 51544 Added: lxml/branch/lxml-2.0/doc/lxml-source-howto.txt - copied unchanged from r51543, lxml/trunk/doc/lxml-source-howto.txt Modified: lxml/branch/lxml-2.0/CHANGES.txt lxml/branch/lxml-2.0/doc/build.txt lxml/branch/lxml-2.0/doc/mkhtml.py lxml/branch/lxml-2.0/doc/objectify.txt lxml/branch/lxml-2.0/doc/pyrex.txt lxml/branch/lxml-2.0/doc/validation.txt lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx lxml/branch/lxml-2.0/src/lxml/xmlerror.pxd lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi lxml/branch/lxml-2.0/update-error-constants.py Log: trunk merge Modified: lxml/branch/lxml-2.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.0/CHANGES.txt (original) +++ lxml/branch/lxml-2.0/CHANGES.txt Fri Feb 15 17:32:03 2008 @@ -2,6 +2,23 @@ lxml changelog ============== +2.0.2 (Under development) +========================= + +Features added +-------------- + +Bugs fixed +---------- + +* Error type names in RelaxNG were reported incorrectly. + +* Slice deletion bug fixed in objectify. + +Other changes +------------- + + 2.0.1 (2008-02-13) ================== Modified: lxml/branch/lxml-2.0/doc/build.txt ============================================================================== --- lxml/branch/lxml-2.0/doc/build.txt (original) +++ lxml/branch/lxml-2.0/doc/build.txt Fri Feb 15 17:32:03 2008 @@ -44,10 +44,10 @@ want to be an lxml developer, then you do need a working Cython installation. You can use EasyInstall_ to install it:: - easy_install Cython==0.9.6.11b + easy_install Cython==0.9.6.12 -lxml currently requires Cython 0.9.6.11b, later versions were not -tested. +lxml currently requires Cython 0.9.6.11b or 0.9.6.12, later versions +were not tested. Subversion Modified: lxml/branch/lxml-2.0/doc/mkhtml.py ============================================================================== --- lxml/branch/lxml-2.0/doc/mkhtml.py (original) +++ lxml/branch/lxml-2.0/doc/mkhtml.py Fri Feb 15 17:32:03 2008 @@ -3,13 +3,14 @@ SITE_STRUCTURE = [ ('lxml', ('main.txt', 'intro.txt', 'lxml2.txt', 'FAQ.txt', - 'compatibility.txt', 'performance.txt', 'build.txt')), + 'compatibility.txt', 'performance.txt')), ('Developing with lxml', ('tutorial.txt', 'api.txt', 'parsing.txt', 'validation.txt', 'xpathxslt.txt', 'objectify.txt', 'lxmlhtml.txt', 'cssselect.txt', 'elementsoup.txt')), ('Extending lxml', ('resolvers.txt', 'extensions.txt', 'element_classes.txt', 'sax.txt', 'capi.txt')), + ('Developing lxml', ('build.txt', 'lxml-source-howto.txt')), ] RST2HTML_OPTIONS = " ".join([ Modified: lxml/branch/lxml-2.0/doc/objectify.txt ============================================================================== --- lxml/branch/lxml-2.0/doc/objectify.txt (original) +++ lxml/branch/lxml-2.0/doc/objectify.txt Fri Feb 15 17:32:03 2008 @@ -16,31 +16,6 @@ used. Python data types are extracted from XML content automatically and made available to the normal Python operators. -To set up and use ``objectify``, you need both the ``lxml.etree`` module and -``lxml.objectify``:: - - >>> from lxml import etree - >>> from lxml import objectify - -The objectify API is very different from the ElementTree API. If it -is used, it should not be mixed with other element implementations -(such as trees parsed with ``lxml.etree``), to avoid non-obvious -behaviour. - -The `benchmark page`_ has some hints on performance optimisation of code using -lxml.objectify. - -To make the doctests in this document look a little nicer, we also use this: - - >>> import lxml.usedoctest - -Imported from within a doctest, this relieves us from caring about the exact -formatting of XML output. - -.. _Amara: http://uche.ogbuji.net/tech/4suite/amara/ -.. _gnosis.xml.objectify: http://gnosis.cx/download/ -.. _`benchmark page`: performance.html#lxml-objectify - .. contents:: .. 1 The lxml.objectify API @@ -61,6 +36,33 @@ 5.5 Advanced element class lookup 6 What is different from lxml.etree? +.. _Amara: http://uche.ogbuji.net/tech/4suite/amara/ +.. _gnosis.xml.objectify: http://gnosis.cx/download/ +.. _`benchmark page`: performance.html#lxml-objectify + + +To set up and use ``objectify``, you need both the ``lxml.etree`` +module and ``lxml.objectify``:: + + >>> from lxml import etree + >>> from lxml import objectify + +The objectify API is very different from the ElementTree API. If it +is used, it should not be mixed with other element implementations +(such as trees parsed with ``lxml.etree``), to avoid non-obvious +behaviour. + +The `benchmark page`_ has some hints on performance optimisation of +code using lxml.objectify. + +To make the doctests in this document look a little nicer, we also use +this:: + + >>> import lxml.usedoctest + +Imported from within a doctest, this relieves us from caring about the exact +formatting of XML output. + The lxml.objectify API ====================== Modified: lxml/branch/lxml-2.0/doc/pyrex.txt ============================================================================== --- lxml/branch/lxml-2.0/doc/pyrex.txt (original) +++ lxml/branch/lxml-2.0/doc/pyrex.txt Fri Feb 15 17:32:03 2008 @@ -22,4 +22,4 @@ clear description of what you did to run into the problems and provide the compiler output that shows the error. -.. Pyrex_: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ +.. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ Modified: lxml/branch/lxml-2.0/doc/validation.txt ============================================================================== --- lxml/branch/lxml-2.0/doc/validation.txt (original) +++ lxml/branch/lxml-2.0/doc/validation.txt Fri Feb 15 17:32:03 2008 @@ -182,14 +182,23 @@ >>> log = relaxng.error_log >>> print log.last_error - :1:0:ERROR:RELAXNGV:ERR_LT_IN_ATTRIBUTE: Did not expect element c there + :1:0:ERROR:RELAXNGV:RELAXNG_ERR_ELEMWRONG: Did not expect element c there You can see that the error (ERROR) happened during RelaxNG validation -(RELAXNGV). The message then tells you what went wrong. Note that this error -log is local to the RelaxNG object. It will only contain log entries that -appeared during the validation. The DocumentInvalid exception raised by the -``assertValid`` method above provides access to the global error log (like all -other lxml exceptions). +(RELAXNGV). The message then tells you what went wrong. You can also +look at the error domain and its type directly:: + + >>> error = log.last_error + >>> print error.domain_name + RELAXNGV + >>> print error.type_name + RELAXNG_ERR_ELEMWRONG + +Note that this error log is local to the RelaxNG object. It will only +contain log entries that appeared during the validation. The +DocumentInvalid exception raised by the ``assertValid`` method above +provides access to the global error log (like all other lxml +exceptions). Similar to XSLT, there's also a less efficient but easier shortcut method to do one-shot RelaxNG validation:: Modified: lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx (original) +++ lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx Fri Feb 15 17:32:03 2008 @@ -317,26 +317,17 @@ def __delitem__(self, key): cdef Py_ssize_t start, stop, step, slicelength + parent = self.getparent() + if parent is None: + raise TypeError("deleting items not supported by root element") if python.PySlice_Check(key): # slice deletion - python.PySlice_GetIndicesEx( - key, _countSiblings(self._c_node), - &start, &stop, &step, &slicelength) - parent = self.getparent() - if parent is None: - raise TypeError("deleting slices of root element not supported") - if step < 0: - del_items = list(self)[start:stop:step] - else: - del_items = list(islice(self, start, stop, step)) + del_items = list(self)[key] remove = parent.remove for el in del_items: remove(el) else: # normal index deletion - parent = self.getparent() - if parent is None: - raise TypeError("deleting items not supported by root element") sibling = self.__getitem__(key) parent.remove(sibling) Modified: lxml/branch/lxml-2.0/src/lxml/xmlerror.pxd ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/xmlerror.pxd (original) +++ lxml/branch/lxml-2.0/src/lxml/xmlerror.pxd Fri Feb 15 17:32:03 2008 @@ -767,6 +767,48 @@ XML_I18N_NO_OUTPUT = 6004 # 6004 XML_CHECK_ = 6005 # 5033 XML_CHECK_X = 6006 # 503 + + ctypedef enum xmlRelaxNGValidErr: + XML_RELAXNG_OK = 0 + XML_RELAXNG_ERR_MEMORY = 1 + XML_RELAXNG_ERR_TYPE = 2 + XML_RELAXNG_ERR_TYPEVAL = 3 + XML_RELAXNG_ERR_DUPID = 4 + XML_RELAXNG_ERR_TYPECMP = 5 + XML_RELAXNG_ERR_NOSTATE = 6 + XML_RELAXNG_ERR_NODEFINE = 7 + XML_RELAXNG_ERR_LISTEXTRA = 8 + XML_RELAXNG_ERR_LISTEMPTY = 9 + XML_RELAXNG_ERR_INTERNODATA = 10 + XML_RELAXNG_ERR_INTERSEQ = 11 + XML_RELAXNG_ERR_INTEREXTRA = 12 + XML_RELAXNG_ERR_ELEMNAME = 13 + XML_RELAXNG_ERR_ATTRNAME = 14 + XML_RELAXNG_ERR_ELEMNONS = 15 + XML_RELAXNG_ERR_ATTRNONS = 16 + XML_RELAXNG_ERR_ELEMWRONGNS = 17 + XML_RELAXNG_ERR_ATTRWRONGNS = 18 + XML_RELAXNG_ERR_ELEMEXTRANS = 19 + XML_RELAXNG_ERR_ATTREXTRANS = 20 + XML_RELAXNG_ERR_ELEMNOTEMPTY = 21 + XML_RELAXNG_ERR_NOELEM = 22 + XML_RELAXNG_ERR_NOTELEM = 23 + XML_RELAXNG_ERR_ATTRVALID = 24 + XML_RELAXNG_ERR_CONTENTVALID = 25 + XML_RELAXNG_ERR_EXTRACONTENT = 26 + XML_RELAXNG_ERR_INVALIDATTR = 27 + XML_RELAXNG_ERR_DATAELEM = 28 + XML_RELAXNG_ERR_VALELEM = 29 + XML_RELAXNG_ERR_LISTELEM = 30 + XML_RELAXNG_ERR_DATATYPE = 31 + XML_RELAXNG_ERR_VALUE = 32 + XML_RELAXNG_ERR_LIST = 33 + XML_RELAXNG_ERR_NOGRAMMAR = 34 + XML_RELAXNG_ERR_EXTRADATA = 35 + XML_RELAXNG_ERR_LACKDATA = 36 + XML_RELAXNG_ERR_INTERNAL = 37 + XML_RELAXNG_ERR_ELEMWRONG = 38 + XML_RELAXNG_ERR_TEXTWRONG = 39 # --- END: GENERATED CONSTANTS --- cdef extern from "libxml/xmlerror.h": Modified: lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi Fri Feb 15 17:32:03 2008 @@ -88,7 +88,11 @@ property type_name: def __get__(self): - return ErrorTypes._getName(self.type, "unknown") + if self.domain == ErrorDomains.RELAXNGV: + getName = RelaxNGErrorTypes._getName + else: + getName = ErrorTypes._getName + return getName(self.type, "unknown") property level_name: def __get__(self): @@ -523,9 +527,10 @@ cdef void __initErrorConstants(): "Called at setup time to parse the constants and build the classes below." find_constants = re.compile(r"\s*([a-zA-Z0-9_]+)\s*=\s*([0-9]+)").findall - const_defs = ((ErrorLevels, __ERROR_LEVELS), - (ErrorDomains, __ERROR_DOMAINS), - (ErrorTypes, __ERROR_TYPES)) + const_defs = ((ErrorLevels, __ERROR_LEVELS), + (ErrorDomains, __ERROR_DOMAINS), + (ErrorTypes, __PARSER_ERROR_TYPES), + (RelaxNGErrorTypes, __RELAXNG_ERROR_TYPES)) for cls, constant_tuple in const_defs: reverse_dict = {} cls._names = reverse_dict @@ -537,6 +542,7 @@ python.PyObject_SetAttr(cls, name, value) python.PyDict_SetItem(reverse_dict, value, name) + class ErrorLevels: "Libxml2 error levels" @@ -546,6 +552,9 @@ class ErrorTypes: "Libxml2 error types" +class RelaxNGErrorTypes: + "Libxml2 RelaxNG error types" + # --- BEGIN: GENERATED CONSTANTS --- # This section is generated by the script 'update-error-constants.py'. @@ -596,8 +605,8 @@ I18N=27 """,) -cdef object __ERROR_TYPES -__ERROR_TYPES = ("""\ +cdef object __PARSER_ERROR_TYPES +__PARSER_ERROR_TYPES = ("""\ ERR_OK=0 ERR_INTERNAL_ERROR=1 ERR_NO_MEMORY=2 @@ -1343,6 +1352,50 @@ CHECK_=6005 CHECK_X=6006 """,) + +cdef object __RELAXNG_ERROR_TYPES +__RELAXNG_ERROR_TYPES = ("""\ +RELAXNG_OK=0 +RELAXNG_ERR_MEMORY=1 +RELAXNG_ERR_TYPE=2 +RELAXNG_ERR_TYPEVAL=3 +RELAXNG_ERR_DUPID=4 +RELAXNG_ERR_TYPECMP=5 +RELAXNG_ERR_NOSTATE=6 +RELAXNG_ERR_NODEFINE=7 +RELAXNG_ERR_LISTEXTRA=8 +RELAXNG_ERR_LISTEMPTY=9 +RELAXNG_ERR_INTERNODATA=10 +RELAXNG_ERR_INTERSEQ=11 +RELAXNG_ERR_INTEREXTRA=12 +RELAXNG_ERR_ELEMNAME=13 +RELAXNG_ERR_ATTRNAME=14 +RELAXNG_ERR_ELEMNONS=15 +RELAXNG_ERR_ATTRNONS=16 +RELAXNG_ERR_ELEMWRONGNS=17 +RELAXNG_ERR_ATTRWRONGNS=18 +RELAXNG_ERR_ELEMEXTRANS=19 +RELAXNG_ERR_ATTREXTRANS=20 +RELAXNG_ERR_ELEMNOTEMPTY=21 +RELAXNG_ERR_NOELEM=22 +RELAXNG_ERR_NOTELEM=23 +RELAXNG_ERR_ATTRVALID=24 +RELAXNG_ERR_CONTENTVALID=25 +RELAXNG_ERR_EXTRACONTENT=26 +RELAXNG_ERR_INVALIDATTR=27 +RELAXNG_ERR_DATAELEM=28 +RELAXNG_ERR_VALELEM=29 +RELAXNG_ERR_LISTELEM=30 +RELAXNG_ERR_DATATYPE=31 +RELAXNG_ERR_VALUE=32 +RELAXNG_ERR_LIST=33 +RELAXNG_ERR_NOGRAMMAR=34 +RELAXNG_ERR_EXTRADATA=35 +RELAXNG_ERR_LACKDATA=36 +RELAXNG_ERR_INTERNAL=37 +RELAXNG_ERR_ELEMWRONG=38 +RELAXNG_ERR_TEXTWRONG=39 +""",) # --- END: GENERATED CONSTANTS --- __initErrorConstants() Modified: lxml/branch/lxml-2.0/update-error-constants.py ============================================================================== --- lxml/branch/lxml-2.0/update-error-constants.py (original) +++ lxml/branch/lxml-2.0/update-error-constants.py Fri Feb 15 17:32:03 2008 @@ -11,20 +11,29 @@ print sys.argv[0], "/path/to/libxml2-doc-dir" sys.exit(len(sys.argv) > 1) -HTML_FILE = os.path.join(sys.argv[1], 'html', 'libxml-xmlerror.html') -os.stat(HTML_FILE) # raise an error if we can't find it +HTML_DIR = os.path.join(sys.argv[1], 'html') +os.stat(HTML_DIR) # raise an error if we can't find it sys.path.insert(0, 'src') from lxml import etree # map enum name to Python variable name and alignment for constant name ENUM_MAP = { - 'xmlErrorLevel' : ('__ERROR_LEVELS', 'XML_ERR_'), - 'xmlErrorDomain' : ('__ERROR_DOMAINS', 'XML_FROM_'), - 'xmlParserErrors' : ('__ERROR_TYPES', 'XML_') + 'xmlErrorLevel' : ('__ERROR_LEVELS', 'XML_ERR_'), + 'xmlErrorDomain' : ('__ERROR_DOMAINS', 'XML_FROM_'), + 'xmlParserErrors' : ('__PARSER_ERROR_TYPES', 'XML_'), +# 'xmlXPathError' : ('__XPATH_ERROR_TYPES', ''), +# 'xmlSchemaValidError' : ('__XMLSCHEMA_ERROR_TYPES', 'XML_'), + 'xmlRelaxNGValidErr' : ('__RELAXNG_ERROR_TYPES', 'XML_'), } -ENUM_ORDER = ('xmlErrorLevel', 'xmlErrorDomain', 'xmlParserErrors') +ENUM_ORDER = ( + 'xmlErrorLevel', + 'xmlErrorDomain', + 'xmlParserErrors', +# 'xmlXPathError', +# 'xmlSchemaValidError', + 'xmlRelaxNGValidErr') COMMENT = """ # This section is generated by the script '%s'. @@ -61,27 +70,43 @@ f.write(''.join(post)) f.close() -def parse_enums(html_file): +collect_text = etree.XPath("string()") +find_enums = etree.XPath( + "//html:pre[@class = 'programlisting' and contains(text(), 'Enum')]", + namespaces = {'html' : 'http://www.w3.org/1999/xhtml'}) + +def parse_enums(html_dir, html_filename, enum_dict): PARSE_ENUM_NAME = re.compile('\s*enum\s+(\w+)\s*{', re.I).match PARSE_ENUM_VALUE = re.compile('\s*=\s+([0-9]+)\s*(?::\s*(.*))?').match - tree = etree.parse(html_file) - xpath = etree.XPathEvaluator( - tree, namespaces={'html' : 'http://www.w3.org/1999/xhtml'}) - - enum_dict = {} - enums = xpath.evaluate("//html:pre[@class = 'programlisting' and contains(text(), 'Enum') and html:a[@name]]") + tree = etree.parse(os.path.join(html_dir, html_filename)) + enums = find_enums(tree) for enum in enums: - enum_name = PARSE_ENUM_NAME(enum.text).group(1) + enum_name = PARSE_ENUM_NAME(collect_text(enum)) + if not enum_name: + continue + enum_name = enum_name.group(1) + if enum_name not in ENUM_MAP: + continue print "Found enum", enum_name entries = [] - enum_dict[enum_name] = entries for child in enum: name = child.text - value, descr = PARSE_ENUM_VALUE(child.tail).groups() + match = PARSE_ENUM_VALUE(child.tail) + if not match: + print("Ignoring enum %s (failed to parse field '%s')" % ( + enum_name, name)) + break + value, descr = match.groups() entries.append((name, int(value), descr)) + else: + enum_dict[enum_name] = entries return enum_dict -enum_dict = parse_enums(HTML_FILE) +enum_dict = {} +parse_enums(HTML_DIR, 'libxml-xmlerror.html', enum_dict) +#parse_enums(HTML_DIR, 'libxml-xpath.html', enum_dict) +#parse_enums(HTML_DIR, 'libxml-xmlschemas.html', enum_dict) +parse_enums(HTML_DIR, 'libxml-relaxng.html', enum_dict) # regenerate source files pxi_result = [] From scoder at codespeak.net Mon Feb 18 11:15:01 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 18 Feb 2008 11:15:01 +0100 (CET) Subject: [Lxml-checkins] r51574 - in lxml/trunk: . doc Message-ID: <20080218101501.4F32C16842D@codespeak.net> Author: scoder Date: Mon Feb 18 11:15:00 2008 New Revision: 51574 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/lxml-source-howto.txt Log: r3535 at delle: sbehnel | 2008-02-15 18:56:52 +0100 source howto update Modified: lxml/trunk/doc/lxml-source-howto.txt ============================================================================== --- lxml/trunk/doc/lxml-source-howto.txt (original) +++ lxml/trunk/doc/lxml-source-howto.txt Mon Feb 18 11:15:00 2008 @@ -16,9 +16,19 @@ .. _lxml: http://codespeak.net/lxml .. _`how to build lxml from sources`: build.html .. _`ReStructured Text`: http://docutils.sourceforge.net/rst.html +.. _epydoc: http://epydoc.sourceforge.net/ +.. _docutils: http://docutils.sourceforge.net/ +.. _`C-level API`: capi.html .. contents:: .. + 1 What is Cython? + 2 Where to start? + 2.1 Concepts + 2.2 The documentation + 3 lxml.etree + 4 lxml.objectify + 5 lxml.html What is Cython? @@ -29,21 +39,23 @@ Cython_ is the language that lxml is written in. It is a very Python-like language that was specifically designed for writing Python -extension modules. The language is so close to Python that the Cython -compiler can actually compile many, many Python programs to C without -major modifications. But the real speed gains of a C compilation come -from type annotations that were added to the language and that allow -Cython to generate very efficient C code. +extension modules. The reason why Cython (or actually its predecessor Pyrex_ at the time) was chosen as an implementation language for lxml, is that it makes it very easy to interface with both the Python world and external C code. Cython generates all the necessary glue code for the Python API, -including Python types and reference counting for Python objects. -Calling into C code is not more than declaring the signature of the -function and maybe some variables as being C types, pointers or -structs, and then calling it. The rest of the code is just plain -Python code. +including Python types, calling conventions and reference counting. +On the other side of the table, calling into C code is not more than +declaring the signature of the function and maybe some variables as +being C types, pointers or structs, and then calling it. The rest of +the code is just plain Python code. + +The Cython language is so close to Python that the Cython compiler can +actually compile many, many Python programs to C without major +modifications. But the real speed gains of a C compilation come from +type annotations that were added to the language and that allow Cython +to generate very efficient C code. Even if you are not familiar with Cython, you should keep in mind that a slow implementation of a feature is better than none. So, if you @@ -56,7 +68,7 @@ Where to start? =============== -First of all, read `how to build lxml from sources` to learn how to +First of all, read `how to build lxml from sources`_ to learn how to retrieve the source code from the Subversion repository and how to build it. The source code lives in the subdirectory ``src`` of the checkout. @@ -65,21 +77,12 @@ ``lxml.objectify``. All main modules have the file extension ``.pyx``, which shows the descendence from Pyrex. As usual in Python, the main files start with a short description and a couple of imports. -Cython destinguishes between the run-time ``import`` statement (as +Cython distinguishes between the run-time ``import`` statement (as known from Python) and the compile-time ``cimport`` statement, which imports C declarations, either from external libraries or from other Cython modules. -The documentation ------------------ - -* docs in ``doc`` directory -* `ReStructured Text`_ format -* generated through ``mkhtml.py`` script -* ... - - Concepts -------- @@ -88,6 +91,38 @@ * ... +The documentation +----------------- + +An important part of lxml is the documentation that lives in the +``doc`` directory. It describes a large part of the API and comprises +a lot of example code in the form of doctests. + +The documentation is written in the `ReStructured Text`_ format, a +very powerful text markup language that looks almost like plain text. +It is part of the docutils_ package. + +The project web site of lxml_ is completely generated from these text +documents. Even the side menu is just collected from the table of +contents that the ReST processor writes into each HTML page. +Obviously, we use lxml for this. + +The easiest way to generate the HTML pages is by calling:: + + make html + +This will call the script ``doc/mkhtml.py`` to run the ReST processor +on the files. After generating an HTML page the script parses it back +in to build the side menu, and injects the complete menu into each +page at the very end. + +Running the ``make`` command will also generate the API documentation +if you have epydoc_ installed. The epydoc package will import and +introspect the extension modules and also introspect and parse the +Python modules of lxml. The aggregated information will then be +written out into an HTML documentation site. + + lxml.etree ========== @@ -104,14 +139,7 @@ The main include files are: -proxy.pxi: - - Very low-level functions for memory allocation/deallocation - and Element proxy handling. Ignoring this for the beginning - will keep your head from exploding. - -apihelpers.pxi: - +apihelpers.pxi Private C helper functions. Most of the little functions that are used all over the place are defined here. This includes things like reading out the text content of a libxml2 tree node, checking @@ -120,77 +148,100 @@ should keep these functions in the back of your head, as they will definitely make your life easier. -xmlerror.pxi: - - Error log handling. All error messages that libxml2 generates - internally walk through the code in this file to end up in lxml's - Python level error logs. - - At the end of the file, you will find a long list of named error - codes. It is generated from the libxml2 HTML documentation (using - lxml, of course). See the script ``update-error-constants.py`` - for this. - -classlookup.pxi: - +classlookup.pxi Element class lookup mechanisms. The main API and engines for those who want to define custom Element classes and inject them into lxml. -nsclasses.pxi: +docloader.pxi + Support for custom document loaders. Base class and registry for + custom document resolvers. + +extensions.pxi + Infrastructure for extension functions in XPath/XSLT, including + XPath value conversion and function registration. +iterparse.pxi + Incremental XML parsing. An iterator class that builds iterparse + events while parsing. + +nsclasses.pxi Namespace implementation and registry. The registry and engine for Element classes that use the ElementNamespaceClassLookup scheme. -docloader.pxi: - - Support for custom document loaders. Base class and registry for - custom document resolvers. - -parser.pxi: - +parser.pxi Parsers for XML and HTML. This is the main parser engine. It's the reason why you can parse a document from various sources in two lines of Python code. It's definitely not the right place to start reading lxml's soure code. -parsertarget.pxi: +parsertarget.pxi + An ElementTree compatible parser target implementation based on + the SAX2 interface of libxml2. - ET Parser target. +proxy.pxi + Very low-level functions for memory allocation/deallocation + and Element proxy handling. Ignoring this for the beginning + will safe your head from exploding. -serializer.pxi: +public-api.pxi + The set of C functions that are exported to other extension + modules at the C level. For example, ``lxml.objectify`` makes use + of these. See the `C-level API` documentation. +serializer.pxi XML output functions. Basically everything that creates byte sequences from XML trees. -iterparse.pxi: +xinclude.pxi + XInclude implementation. - Incremental XML parsing. An iterator class that builds iterparse - events while parsing. +xmlerror.pxi + Error log handling. All error messages that libxml2 generates + internally walk through the code in this file to end up in lxml's + Python level error logs. -xmlid.pxi: + At the end of the file, you will find a long list of named error + codes. It is generated from the libxml2 HTML documentation (using + lxml, of course). See the script ``update-error-constants.py`` + for this. +xmlid.pxi XMLID and IDDict, a dictionary-like way to find Elements by their XML-ID attribute. -xinclude.pxi: +xpath.pxi + XPath evaluators. - XInclude implementation. +xslt.pxi + XSL transformations, including the ``XSLT`` class, document lookup + handling and access control. -extensions.pxi: +The different schema languages (DTD, RelaxNG, XML Schema and +Schematron) are implemented in the following include files: - Infrastructure for extension functions in XPath/XSLT, including - XPath value conversion and function registration. +* dtd.pxi +* relaxng.pxi +* schematron.pxi +* xmlschema.pxi -xpath.pxi: - XPath evaluators. +Python modules +============== -xslt.pxi: +The ``lxml`` package also contains a number of pure Python modules: - XSL transformations, including the ``XSLT`` class, document lookup - handling and access control. +builder.py + The E-factory and the ElementBuilder class. These provide a + simple interface to XML tree generation. + +cssselect.py + A CSS selector implementation based on XPath. The main class is + called ``CSSSelector``. + +doctestcompare.py + lxml.objectify From scoder at codespeak.net Mon Feb 18 11:15:06 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 18 Feb 2008 11:15:06 +0100 (CET) Subject: [Lxml-checkins] r51575 - in lxml/trunk: . doc Message-ID: <20080218101506.1BBDA1684CD@codespeak.net> Author: scoder Date: Mon Feb 18 11:15:05 2008 New Revision: 51575 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/lxml-source-howto.txt Log: r3536 at delle: sbehnel | 2008-02-18 10:34:18 +0100 source doc Modified: lxml/trunk/doc/lxml-source-howto.txt ============================================================================== --- lxml/trunk/doc/lxml-source-howto.txt (original) +++ lxml/trunk/doc/lxml-source-howto.txt Mon Feb 18 11:15:05 2008 @@ -241,7 +241,19 @@ called ``CSSSelector``. doctestcompare.py - + ... + +ElementInclude.py + ... + +_elementpath.py + ... + +sax.py + ... + +usedoctest.py + ... lxml.objectify From scoder at codespeak.net Mon Feb 18 11:19:44 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 18 Feb 2008 11:19:44 +0100 (CET) Subject: [Lxml-checkins] r51576 - lxml/branch/lxml-2.0/doc Message-ID: <20080218101944.0981616842D@codespeak.net> Author: scoder Date: Mon Feb 18 11:19:44 2008 New Revision: 51576 Modified: lxml/branch/lxml-2.0/doc/lxml-source-howto.txt Log: trunk merge Modified: lxml/branch/lxml-2.0/doc/lxml-source-howto.txt ============================================================================== --- lxml/branch/lxml-2.0/doc/lxml-source-howto.txt (original) +++ lxml/branch/lxml-2.0/doc/lxml-source-howto.txt Mon Feb 18 11:19:44 2008 @@ -16,9 +16,19 @@ .. _lxml: http://codespeak.net/lxml .. _`how to build lxml from sources`: build.html .. _`ReStructured Text`: http://docutils.sourceforge.net/rst.html +.. _epydoc: http://epydoc.sourceforge.net/ +.. _docutils: http://docutils.sourceforge.net/ +.. _`C-level API`: capi.html .. contents:: .. + 1 What is Cython? + 2 Where to start? + 2.1 Concepts + 2.2 The documentation + 3 lxml.etree + 4 lxml.objectify + 5 lxml.html What is Cython? @@ -29,21 +39,23 @@ Cython_ is the language that lxml is written in. It is a very Python-like language that was specifically designed for writing Python -extension modules. The language is so close to Python that the Cython -compiler can actually compile many, many Python programs to C without -major modifications. But the real speed gains of a C compilation come -from type annotations that were added to the language and that allow -Cython to generate very efficient C code. +extension modules. The reason why Cython (or actually its predecessor Pyrex_ at the time) was chosen as an implementation language for lxml, is that it makes it very easy to interface with both the Python world and external C code. Cython generates all the necessary glue code for the Python API, -including Python types and reference counting for Python objects. -Calling into C code is not more than declaring the signature of the -function and maybe some variables as being C types, pointers or -structs, and then calling it. The rest of the code is just plain -Python code. +including Python types, calling conventions and reference counting. +On the other side of the table, calling into C code is not more than +declaring the signature of the function and maybe some variables as +being C types, pointers or structs, and then calling it. The rest of +the code is just plain Python code. + +The Cython language is so close to Python that the Cython compiler can +actually compile many, many Python programs to C without major +modifications. But the real speed gains of a C compilation come from +type annotations that were added to the language and that allow Cython +to generate very efficient C code. Even if you are not familiar with Cython, you should keep in mind that a slow implementation of a feature is better than none. So, if you @@ -56,7 +68,7 @@ Where to start? =============== -First of all, read `how to build lxml from sources` to learn how to +First of all, read `how to build lxml from sources`_ to learn how to retrieve the source code from the Subversion repository and how to build it. The source code lives in the subdirectory ``src`` of the checkout. @@ -65,21 +77,12 @@ ``lxml.objectify``. All main modules have the file extension ``.pyx``, which shows the descendence from Pyrex. As usual in Python, the main files start with a short description and a couple of imports. -Cython destinguishes between the run-time ``import`` statement (as +Cython distinguishes between the run-time ``import`` statement (as known from Python) and the compile-time ``cimport`` statement, which imports C declarations, either from external libraries or from other Cython modules. -The documentation ------------------ - -* docs in ``doc`` directory -* `ReStructured Text`_ format -* generated through ``mkhtml.py`` script -* ... - - Concepts -------- @@ -88,6 +91,38 @@ * ... +The documentation +----------------- + +An important part of lxml is the documentation that lives in the +``doc`` directory. It describes a large part of the API and comprises +a lot of example code in the form of doctests. + +The documentation is written in the `ReStructured Text`_ format, a +very powerful text markup language that looks almost like plain text. +It is part of the docutils_ package. + +The project web site of lxml_ is completely generated from these text +documents. Even the side menu is just collected from the table of +contents that the ReST processor writes into each HTML page. +Obviously, we use lxml for this. + +The easiest way to generate the HTML pages is by calling:: + + make html + +This will call the script ``doc/mkhtml.py`` to run the ReST processor +on the files. After generating an HTML page the script parses it back +in to build the side menu, and injects the complete menu into each +page at the very end. + +Running the ``make`` command will also generate the API documentation +if you have epydoc_ installed. The epydoc package will import and +introspect the extension modules and also introspect and parse the +Python modules of lxml. The aggregated information will then be +written out into an HTML documentation site. + + lxml.etree ========== @@ -104,14 +139,7 @@ The main include files are: -proxy.pxi: - - Very low-level functions for memory allocation/deallocation - and Element proxy handling. Ignoring this for the beginning - will keep your head from exploding. - -apihelpers.pxi: - +apihelpers.pxi Private C helper functions. Most of the little functions that are used all over the place are defined here. This includes things like reading out the text content of a libxml2 tree node, checking @@ -120,77 +148,112 @@ should keep these functions in the back of your head, as they will definitely make your life easier. -xmlerror.pxi: - - Error log handling. All error messages that libxml2 generates - internally walk through the code in this file to end up in lxml's - Python level error logs. - - At the end of the file, you will find a long list of named error - codes. It is generated from the libxml2 HTML documentation (using - lxml, of course). See the script ``update-error-constants.py`` - for this. - -classlookup.pxi: - +classlookup.pxi Element class lookup mechanisms. The main API and engines for those who want to define custom Element classes and inject them into lxml. -nsclasses.pxi: +docloader.pxi + Support for custom document loaders. Base class and registry for + custom document resolvers. + +extensions.pxi + Infrastructure for extension functions in XPath/XSLT, including + XPath value conversion and function registration. + +iterparse.pxi + Incremental XML parsing. An iterator class that builds iterparse + events while parsing. +nsclasses.pxi Namespace implementation and registry. The registry and engine for Element classes that use the ElementNamespaceClassLookup scheme. -docloader.pxi: - - Support for custom document loaders. Base class and registry for - custom document resolvers. - -parser.pxi: - +parser.pxi Parsers for XML and HTML. This is the main parser engine. It's the reason why you can parse a document from various sources in two lines of Python code. It's definitely not the right place to start reading lxml's soure code. -parsertarget.pxi: +parsertarget.pxi + An ElementTree compatible parser target implementation based on + the SAX2 interface of libxml2. - ET Parser target. +proxy.pxi + Very low-level functions for memory allocation/deallocation + and Element proxy handling. Ignoring this for the beginning + will safe your head from exploding. -serializer.pxi: +public-api.pxi + The set of C functions that are exported to other extension + modules at the C level. For example, ``lxml.objectify`` makes use + of these. See the `C-level API` documentation. +serializer.pxi XML output functions. Basically everything that creates byte sequences from XML trees. -iterparse.pxi: +xinclude.pxi + XInclude implementation. - Incremental XML parsing. An iterator class that builds iterparse - events while parsing. +xmlerror.pxi + Error log handling. All error messages that libxml2 generates + internally walk through the code in this file to end up in lxml's + Python level error logs. -xmlid.pxi: + At the end of the file, you will find a long list of named error + codes. It is generated from the libxml2 HTML documentation (using + lxml, of course). See the script ``update-error-constants.py`` + for this. +xmlid.pxi XMLID and IDDict, a dictionary-like way to find Elements by their XML-ID attribute. -xinclude.pxi: +xpath.pxi + XPath evaluators. - XInclude implementation. +xslt.pxi + XSL transformations, including the ``XSLT`` class, document lookup + handling and access control. -extensions.pxi: +The different schema languages (DTD, RelaxNG, XML Schema and +Schematron) are implemented in the following include files: - Infrastructure for extension functions in XPath/XSLT, including - XPath value conversion and function registration. +* dtd.pxi +* relaxng.pxi +* schematron.pxi +* xmlschema.pxi -xpath.pxi: - XPath evaluators. +Python modules +============== -xslt.pxi: +The ``lxml`` package also contains a number of pure Python modules: - XSL transformations, including the ``XSLT`` class, document lookup - handling and access control. +builder.py + The E-factory and the ElementBuilder class. These provide a + simple interface to XML tree generation. + +cssselect.py + A CSS selector implementation based on XPath. The main class is + called ``CSSSelector``. + +doctestcompare.py + ... + +ElementInclude.py + ... + +_elementpath.py + ... + +sax.py + ... + +usedoctest.py + ... lxml.objectify From ianb at codespeak.net Mon Feb 18 18:44:12 2008 From: ianb at codespeak.net (ianb at codespeak.net) Date: Mon, 18 Feb 2008 18:44:12 +0100 (CET) Subject: [Lxml-checkins] r51601 - lxml/trunk/src/lxml/html Message-ID: <20080218174412.C933D1683DD@codespeak.net> Author: ianb Date: Mon Feb 18 18:44:09 2008 New Revision: 51601 Modified: lxml/trunk/src/lxml/html/__init__.py Log: Make getter/setter methods private Modified: lxml/trunk/src/lxml/html/__init__.py ============================================================================== --- lxml/trunk/src/lxml/html/__init__.py (original) +++ lxml/trunk/src/lxml/html/__init__.py Mon Feb 18 18:44:09 2008 @@ -67,7 +67,7 @@ return self.xpath('//head')[0] head = property(head, doc=head.__doc__) - def label__get(self): + def _label__get(self): """ Get or set any