From scoder at codespeak.net Sat Feb 10 18:36:08 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 10 Feb 2007 18:36:08 +0100 (CET) Subject: [Lxml-checkins] r38403 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20070210173608.528F7100A5@code0.codespeak.net> Author: scoder Date: Sat Feb 10 18:36:06 2007 New Revision: 38403 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/tests/test_xslt.py lxml/trunk/src/lxml/xslt.pxi Log: get()/set() methods for PI elements Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Feb 10 18:36:06 2007 @@ -8,8 +8,10 @@ Features added -------------- -* ElementInclude module for ElementTree compatible XInclude processing that - honours custom resolvers registered with the source document +* get/set emulation (not .attrib!) for attributes on processing instructions + +* ElementInclude Python module for ElementTree compatible XInclude processing + that honours custom resolvers registered with the source document * ElementTree.parser property holds the parser used to parse the document Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Sat Feb 10 18:36:06 2007 @@ -599,7 +599,7 @@ style_root = tree.getroot().getprevious().parseXSL().getroot() self.assertEquals("{http://www.w3.org/1999/XSL/Transform}stylesheet", style_root.tag) - + def test_xslt_pi_embedded_xmlid(self): # test xml:id dictionary lookup mechanism tree = self.parse('''\ @@ -628,7 +628,7 @@ B ''', st.tostring(res)) - + def test_xslt_pi_embedded_id(self): # test XPath lookup mechanism tree = self.parse('''\ @@ -663,6 +663,88 @@ ''', st.tostring(res)) + def test_xslt_pi_get(self): + tree = self.parse('''\ + + + + B + C +''') + + pi = tree.getroot().getprevious() + self.assertEquals("TEST", pi.get("href")) + + def test_xslt_pi_get_all(self): + tree = self.parse('''\ + + + + B + C +''') + + pi = tree.getroot().getprevious() + self.assertEquals("TEST", pi.get("href")) + self.assertEquals("text/xsl", pi.get("type")) + self.assertEquals(None, pi.get("motz")) + + def test_xslt_pi_get_all_reversed(self): + tree = self.parse('''\ + + + + B + C +''') + + pi = tree.getroot().getprevious() + self.assertEquals("TEST", pi.get("href")) + self.assertEquals("text/xsl", pi.get("type")) + self.assertEquals(None, pi.get("motz")) + + def test_xslt_pi_get_unknown(self): + tree = self.parse('''\ + + + + B + C +''') + + pi = tree.getroot().getprevious() + self.assertEquals(None, pi.get("unknownattribute")) + + def test_xslt_pi_set_replace(self): + tree = self.parse('''\ + + + + B + C +''') + + pi = tree.getroot().getprevious() + self.assertEquals("TEST", pi.get("href")) + + pi.set("href", "TEST123") + self.assertEquals("TEST123", pi.get("href")) + + def test_xslt_pi_set_new(self): + tree = self.parse('''\ + + + + B + C +''') + + pi = tree.getroot().getprevious() + self.assertEquals(None, pi.get("href")) + + pi.set("href", "TEST") + self.assertEquals("TEST", pi.get("href")) + def test_exslt_regexp_test(self): xslt = etree.XSLT(etree.XML("""\ ' in value: + raise ValueError, "Invalid URL, must not contain '\"' or '>'" + else: + attrib = ' href="%s"' % value + text = ' ' + self.text + if _FIND_PI_HREF(text): + self.text = _REPLACE_PI_HREF(attrib, text) + else: + self.text = text + attrib + + def get(self, key, default=None): + for attr, value in _FIND_PI_ATTRIBUTES(' ' + self.text): + if attr == key: + return value + return default ################################################################################ # EXSLT regexp implementation From scoder at codespeak.net Sat Feb 10 18:36:30 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 10 Feb 2007 18:36:30 +0100 (CET) Subject: [Lxml-checkins] r38404 - lxml/trunk Message-ID: <20070210173630.E9823100A5@code0.codespeak.net> Author: scoder Date: Sat Feb 10 18:36:29 2007 New Revision: 38404 Modified: lxml/trunk/setup.py Log: it's mature, right? Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Sat Feb 10 18:36:29 2007 @@ -37,8 +37,9 @@ description="Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API.", long_description=(("""\ -lxml is a Pythonic binding for the libxml2 and libxslt libraries. It provides -safe and convenient access to these libraries using the ElementTree API. +lxml is a Pythonic, mature binding for the libxml2 and libxslt libraries. It +provides safe and convenient access to these libraries using the ElementTree +API. It extends the ElementTree API significantly to offer support for XPath, RelaxNG, XML Schema, XSLT, C14N and much more. From scoder at codespeak.net Sat Feb 10 22:26:01 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 10 Feb 2007 22:26:01 +0100 (CET) Subject: [Lxml-checkins] r38417 - lxml/trunk/benchmark Message-ID: <20070210212601.DBBB710093@code0.codespeak.net> Author: scoder Date: Sat Feb 10 22:25:59 2007 New Revision: 38417 Modified: lxml/trunk/benchmark/benchbase.py Log: support regexps in benchmark selection Modified: lxml/trunk/benchmark/benchbase.py ============================================================================== --- lxml/trunk/benchmark/benchbase.py (original) +++ lxml/trunk/benchmark/benchbase.py Sat Feb 10 22:25:59 2007 @@ -1,4 +1,4 @@ -import sys, string, time, copy, gc +import sys, re, string, time, copy, gc from itertools import * from StringIO import StringIO import time @@ -305,10 +305,12 @@ # sorted by name and tree tuple benchmarks = [ sorted(b.benchmarks()) for b in benchmark_suites ] + selected = [ re.compile(r).search for r in selected ] + if selected: benchmarks = [ [ b for b in bs if [ match for match in selected - if match in b[0] ] ] + if match(b[0]) ] ] for bs in benchmarks ] return (benchmark_suites, benchmarks) From scoder at codespeak.net Sat Feb 10 22:26:40 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 10 Feb 2007 22:26:40 +0100 (CET) Subject: [Lxml-checkins] r38418 - lxml/trunk Message-ID: <20070210212640.95DD610093@code0.codespeak.net> Author: scoder Date: Sat Feb 10 22:26:38 2007 New Revision: 38418 Modified: lxml/trunk/CHANGES.txt Log: support regexps in benchmark selection Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Feb 10 22:26:38 2007 @@ -8,6 +8,8 @@ Features added -------------- +* Support for regular expressions in benchmark selection + * get/set emulation (not .attrib!) for attributes on processing instructions * ElementInclude Python module for ElementTree compatible XInclude processing From scoder at codespeak.net Sat Feb 10 22:35:17 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 10 Feb 2007 22:35:17 +0100 (CET) Subject: [Lxml-checkins] r38419 - lxml/trunk/src/lxml Message-ID: <20070210213517.2E33510093@code0.codespeak.net> Author: scoder Date: Sat Feb 10 22:35:15 2007 New Revision: 38419 Added: lxml/trunk/src/lxml/config.pxd Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/etree_defs.h lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/python.pxd Log: ported parser locking to pythread.h (instead of Python thread module) Added: lxml/trunk/src/lxml/config.pxd ============================================================================== --- (empty file) +++ lxml/trunk/src/lxml/config.pxd Sat Feb 10 22:35:15 2007 @@ -0,0 +1,2 @@ +cdef extern from "etree_defs.h": + cdef int ENABLE_THREADING Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sat Feb 10 22:35:15 2007 @@ -1,4 +1,4 @@ -cimport tree, python +cimport tree, python, config from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs from python cimport isinstance, issubclass, hasattr, getattr, callable from python cimport iter, repr, str, _cstr, _isString, Py_ssize_t @@ -38,12 +38,6 @@ cdef object re import re -cdef object thread -try: - import thread -except ImportError: - pass - cdef object ITER_EMPTY ITER_EMPTY = iter(()) Modified: lxml/trunk/src/lxml/etree_defs.h ============================================================================== --- lxml/trunk/src/lxml/etree_defs.h (original) +++ lxml/trunk/src/lxml/etree_defs.h Sat Feb 10 22:35:15 2007 @@ -28,6 +28,17 @@ #define PyEval_RestoreThread(state) #define PyGILState_Ensure() (PyGILState_UNLOCKED) #define PyGILState_Release(state) + + #define PyThread_allocate_lock() (NULL) + #define PyThread_free_lock(lock) + #define PyThread_acquire_lock(lock, mode) (1) + #define PyThread_release_lock(lock) +#endif + +#ifdef WITHOUT_THREADING + #define ENABLE_THREADING 0 +#else + #define ENABLE_THREADING 1 #endif /* libxml2 version specific setup */ Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sat Feb 10 22:35:15 2007 @@ -349,8 +349,7 @@ cdef LxmlParserType _parser_type cdef xmlParserCtxt* _parser_ctxt cdef ElementClassLookup _class_lookup - cdef object _lockParser - cdef object _unlockParser + cdef python.PyThread_type_lock _parser_lock def __init__(self, context_class=_ResolverContext): cdef xmlParserCtxt* pctxt @@ -371,14 +370,12 @@ if pctxt.sax != NULL: # hard switch-off for CDATA nodes => makes them plain text pctxt.sax.cdataBlock = NULL - if thread is None or self._parser_type == LXML_ITERPARSE_PARSER: + if not config.ENABLE_THREADING or \ + self._parser_type == LXML_ITERPARSE_PARSER: # no threading - self._lockParser = self.__dummy - self._unlockParser = self.__dummy + self._parser_lock = NULL else: - lock = thread.allocate_lock() - self._lockParser = lock.acquire - self._unlockParser = lock.release + self._parser_lock = python.PyThread_allocate_lock() self._error_log = _ErrorLog() self.resolvers = _ResolverRegistry() self._context = context_class(self.resolvers) @@ -387,6 +384,8 @@ def __dealloc__(self): if self._parser_ctxt is not NULL: xmlparser.xmlFreeParserCtxt(self._parser_ctxt) + if self._parser_lock is not NULL: + python.PyThread_free_lock(self._parser_lock) cdef void _cleanup(self): cdef xmlParserCtxt* pctxt @@ -395,6 +394,21 @@ if pctxt.spaceTab is not NULL: # work around bug in libxml2 xmlparser.xmlClearParserCtxt(pctxt) + cdef int _lockParser(self) except 1: + cdef python.PyThreadState* state + cdef int result + if config.ENABLE_THREADING and self._parser_lock != NULL: + state = python.PyEval_SaveThread() + result = python.PyThread_acquire_lock(self._parser_lock, python.WAIT_LOCK) + python.PyEval_RestoreThread(state) + if result == 0: + raise ParserError, "parser locking failed" + return 0 + + cdef void _unlockParser(self): + if config.ENABLE_THREADING and self._parser_lock != NULL: + python.PyThread_release_lock(self._parser_lock) + property error_log: def __get__(self): return self._error_log.copy() Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Sat Feb 10 22:35:15 2007 @@ -88,6 +88,18 @@ cdef void PyEval_RestoreThread(PyThreadState* state) cdef PyObject* PyThreadState_GetDict() +cdef extern from "pythread.h": + ctypedef void* PyThread_type_lock + cdef PyThread_type_lock PyThread_allocate_lock() + cdef void PyThread_free_lock(PyThread_type_lock lock) + cdef int PyThread_acquire_lock(PyThread_type_lock lock, int mode) + cdef void PyThread_release_lock(PyThread_type_lock lock) + cdef long PyThread_get_thread_ident() + + ctypedef enum __WaitLock: + WAIT_LOCK + NOWAIT_LOCK + cdef extern from "etree_defs.h": # redefines some functions as macros cdef int _isString(object obj) cdef int isinstance(object instance, object classes) From scoder at codespeak.net Mon Feb 12 10:41:08 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 12 Feb 2007 10:41:08 +0100 (CET) Subject: [Lxml-checkins] r38537 - lxml/trunk/src/lxml Message-ID: <20070212094108.9E405100A7@code0.codespeak.net> Author: scoder Date: Mon Feb 12 10:41:06 2007 New Revision: 38537 Modified: lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/tree.pxd Log: use enum types instead of ints Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Mon Feb 12 10:41:06 2007 @@ -160,7 +160,7 @@ cdef char* _findEncodingName(char* buffer, int size): "Work around bug in libxml2: find iconv name of encoding on our own." - cdef int enc + cdef tree.xmlCharEncoding enc enc = tree.xmlDetectCharEncoding(buffer, size) if enc == tree.XML_CHAR_ENCODING_UTF16LE: return "UTF16LE" Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Mon Feb 12 10:41:06 2007 @@ -36,9 +36,9 @@ ctypedef struct xmlCharEncodingHandler cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name) - cdef xmlCharEncodingHandler* xmlGetCharEncodingHandler(int enc) + cdef xmlCharEncodingHandler* xmlGetCharEncodingHandler(xmlCharEncoding enc) cdef int xmlCharEncCloseFunc(xmlCharEncodingHandler* handler) - cdef int xmlDetectCharEncoding(char* text, int len) + cdef xmlCharEncoding xmlDetectCharEncoding(char* text, int len) cdef char* xmlGetCharEncodingName(xmlCharEncoding enc) cdef extern from "libxml/hash.h": From scoder at codespeak.net Wed Feb 14 15:16:37 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 14 Feb 2007 15:16:37 +0100 (CET) Subject: [Lxml-checkins] r38833 - lxml/trunk Message-ID: <20070214141637.2D97610086@code0.codespeak.net> Author: scoder Date: Wed Feb 14 15:16:37 2007 New Revision: 38833 Modified: lxml/trunk/version.txt Log: version: 1.2 Modified: lxml/trunk/version.txt ============================================================================== --- lxml/trunk/version.txt (original) +++ lxml/trunk/version.txt Wed Feb 14 15:16:37 2007 @@ -1 +1 @@ -1.2dev +1.2 From scoder at codespeak.net Wed Feb 14 15:17:02 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 14 Feb 2007 15:17:02 +0100 (CET) Subject: [Lxml-checkins] r38834 - lxml/trunk Message-ID: <20070214141702.254DF10086@code0.codespeak.net> Author: scoder Date: Wed Feb 14 15:16:57 2007 New Revision: 38834 Modified: lxml/trunk/setupinfo.py Log: env_map removed (by Sidnei) Modified: lxml/trunk/setupinfo.py ============================================================================== --- lxml/trunk/setupinfo.py (original) +++ lxml/trunk/setupinfo.py Wed Feb 14 15:16:57 2007 @@ -13,13 +13,8 @@ ] -env_map = {'win32':{'INCLUDE': 'INCLUDE', - 'LIBRARY': 'LIB', - 'CFLAGS' : 'CFLAGS'}, - }.get(sys.platform, {}) - def env_var(name): - value = os.getenv(env_map.get(name), '') + value = os.getenv(name, '') return value.split(os.pathsep) def ext_modules(static_include_dirs, static_library_dirs, static_cflags): From scoder at codespeak.net Wed Feb 14 15:17:43 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 14 Feb 2007 15:17:43 +0100 (CET) Subject: [Lxml-checkins] r38835 - lxml/trunk Message-ID: <20070214141743.3B2B210086@code0.codespeak.net> Author: scoder Date: Wed Feb 14 15:17:41 2007 New Revision: 38835 Modified: lxml/trunk/versioninfo.py Log: SVN version parser cleanups Modified: lxml/trunk/versioninfo.py ============================================================================== --- lxml/trunk/versioninfo.py (original) +++ lxml/trunk/versioninfo.py Wed Feb 14 15:17:41 2007 @@ -30,11 +30,13 @@ f.close() if data.startswith('8'): + # SVN >= 1.4 data = map(str.splitlines, data.split('\n\x0c\n')) del data[0][0] # get rid of the '8' dirurl = data[0][3] localrev = max([int(d[9]) for d in data if len(d)>9 and d[9]]) elif data.startswith(' Author: scoder Date: Wed Feb 14 15:19:02 2007 New Revision: 38836 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/etreepublic.pxd lxml/trunk/src/lxml/extensions.pxi lxml/trunk/src/lxml/iterparse.pxi lxml/trunk/src/lxml/proxy.pxi lxml/trunk/src/lxml/public-api.pxi lxml/trunk/src/lxml/relaxng.pxi lxml/trunk/src/lxml/serializer.pxi lxml/trunk/src/lxml/xmlid.pxi lxml/trunk/src/lxml/xmlschema.pxi lxml/trunk/src/lxml/xpath.pxi lxml/trunk/src/lxml/xslt.pxi Log: removed now unneeded _NodeBase class, merged into _Element Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Feb 14 15:19:02 2007 @@ -2,8 +2,8 @@ lxml changelog ============== -under development -================= +1.2 (2007-02-14) +================ Features added -------------- @@ -34,6 +34,13 @@ * Element.find*() did not accept QName objects as path +Other changes +------------- + +* code cleanup: redundant _NodeBase super class merged into _Element class + Note: although the impact should be zero in most cases, this change breaks + the compatibiliy of the public C-API + 1.1.2 (2006-10-30) ================== Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Wed Feb 14 15:19:02 2007 @@ -10,19 +10,19 @@ c_child = c_child.next cdef _Document _documentOrRaise(object input): - """Call this to get the document of a _Document, _ElementTree or _NodeBase + """Call this to get the document of a _Document, _ElementTree or _Element object, or to raise an exception if it can't be determined. Should be used in all API functions for consistency. """ cdef _Document doc - cdef _NodeBase element + cdef _Element element if isinstance(input, _ElementTree): element = (<_ElementTree>input)._context_node if element is not None: doc = element._doc - elif isinstance(input, _NodeBase): - doc = (<_NodeBase>input)._doc + elif isinstance(input, _Element): + doc = (<_Element>input)._doc elif isinstance(input, _Document): doc = <_Document>input else: @@ -32,17 +32,17 @@ else: return doc -cdef _NodeBase _rootNodeOrRaise(object input): +cdef _Element _rootNodeOrRaise(object input): """Call this to get the root node of a _Document, _ElementTree or - _NodeBase object, or to raise an exception if it can't be determined. + _Element object, or to raise an exception if it can't be determined. Should be used in all API functions for consistency. """ - cdef _NodeBase node + cdef _Element node if isinstance(input, _ElementTree): node = (<_ElementTree>input)._context_node - elif isinstance(input, _NodeBase): - node = <_NodeBase>input + elif isinstance(input, _Element): + node = <_Element>input elif isinstance(input, _Document): node = (<_Document>input).getroot() else: @@ -54,27 +54,27 @@ cdef _Document _documentOf(object input): # call this to get the document of a - # _Document, _ElementTree or _NodeBase object + # _Document, _ElementTree or _Element object # may return None! - cdef _NodeBase element + cdef _Element element if isinstance(input, _ElementTree): element = (<_ElementTree>input)._context_node if element is not None: return element._doc - elif isinstance(input, _NodeBase): - return (<_NodeBase>input)._doc + elif isinstance(input, _Element): + return (<_Element>input)._doc elif isinstance(input, _Document): return <_Document>input return None -cdef _NodeBase _rootNodeOf(object input): +cdef _Element _rootNodeOf(object input): # call this to get the root node of a - # _Document, _ElementTree or _NodeBase object + # _Document, _ElementTree or _Element object # may return None! if isinstance(input, _ElementTree): return (<_ElementTree>input)._context_node - elif isinstance(input, _NodeBase): - return <_NodeBase>input + elif isinstance(input, _Element): + return <_Element>input elif isinstance(input, _Document): return (<_Document>input).getroot() else: @@ -176,7 +176,7 @@ tree.xmlFree(c_result) return result -cdef object _getAttributeValue(_NodeBase element, key, default): +cdef object _getAttributeValue(_Element element, key, default): cdef char* c_result cdef char* c_tag ns, tag = _getNsTag(key) @@ -192,7 +192,7 @@ tree.xmlFree(c_result) return result -cdef int _setAttributeValue(_NodeBase element, key, value) except -1: +cdef int _setAttributeValue(_Element element, key, value) except -1: cdef xmlNs* c_ns cdef char* c_value cdef char* c_tag @@ -207,7 +207,7 @@ tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value) return 0 -cdef int _delAttribute(_NodeBase element, key) except -1: +cdef int _delAttribute(_Element element, key) except -1: cdef xmlAttr* c_attr cdef char* c_href ns, tag = _getNsTag(key) Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Feb 14 15:19:02 2007 @@ -358,6 +358,7 @@ result._parser = parser return result + cdef class DocInfo: "Document information provided by parser and DTD." cdef readonly object root_name @@ -391,287 +392,33 @@ else: return "" -cdef public class _NodeBase [ type LxmlNodeBaseType, - object LxmlNodeBase ]: - """Base class to reference a document object and a libxml node. + +cdef public class _Element [ type LxmlElementType, object LxmlElement ]: + """Element class. References a document object and a libxml node. By pointing to a Document instance, a reference is kept to _Document as long as there is some pointer to a node in it. """ cdef _Document _doc cdef xmlNode* _c_node - - def __dealloc__(self): - #print "trying to free node:", self._c_node - #displayNode(self._c_node, 0) - if self._c_node is not NULL: - unregisterProxy(self) - attemptDeallocation(self._c_node) - -cdef public class _ElementTree [ type LxmlElementTreeType, - object LxmlElementTree ]: - cdef _Document _doc - cdef _NodeBase _context_node - - # Note that _doc is only used to store the original document if we do not - # have a _context_node. All methods should prefer self._context_node._doc - # to honour tree restructuring. _doc can happily be None! - - cdef _assertHasRoot(self): - """We have to take care here: the document may not have a root node! - This can happen if ElementTree() is called without any argument and - the caller 'forgets' to call parse() afterwards, so this is a bug in - the caller program. - """ - assert self._context_node is not None, \ - "ElementTree not initialized, missing root" - - def parse(self, source, _BaseParser parser=None): - """Updates self with the content of source and returns its root - """ - cdef _Document doc - doc = _parseDocument(source, parser) - self._context_node = doc.getroot() - if self._context_node is None: - self._doc = doc - else: - self._doc = None - return self._context_node - - def getroot(self): - """Gets the root element for this tree. - """ - return self._context_node - - def __copy__(self): - return ElementTree(self._context_node) - - def __deepcopy__(self, memo): - if self._context_node is None: - return ElementTree() - else: - return ElementTree( self._context_node.__copy__() ) - - property docinfo: - """Information about the document provided by parser and DTD. This - value is only defined for ElementTree objects based on the root node - of a parsed document (e.g. those returned by the parse functions). - """ - def __get__(self): - self._assertHasRoot() - return DocInfo(self._context_node._doc) - - property parser: - """The parser that was used to parse the document in this ElementTree. - """ - def __get__(self): - if self._context_node is not None and \ - self._context_node._doc is not None: - return self._context_node._doc._parser - return None - - def write(self, file, encoding=None, - pretty_print=False, xml_declaration=None): - """Write the tree to a file or file-like object. - - Defaults to ASCII encoding and writing a declaration as needed. - """ - cdef int c_write_declaration - self._assertHasRoot() - # suppress decl. in default case (purely for ElementTree compatibility) - if xml_declaration is not None: - c_write_declaration = bool(xml_declaration) - if encoding is None: - encoding = 'ASCII' - elif encoding is None: - encoding = 'ASCII' - c_write_declaration = 0 - else: - encoding = encoding.upper() - c_write_declaration = encoding not in \ - ('US-ASCII', 'ASCII', 'UTF8', 'UTF-8') - _tofilelike(file, self._context_node, encoding, - c_write_declaration, bool(pretty_print)) - - def getpath(self, _NodeBase element not None): - """Returns a structural, absolute XPath expression to find that element. - """ - cdef _Document doc - cdef xmlDoc* c_doc - cdef char* c_path - doc = self._context_node._doc - if element._doc is not doc: - raise ValueError, "Element is not in this tree." - c_doc = _fakeRootDoc(doc._c_doc, self._context_node._c_node) - c_path = tree.xmlGetNodePath(element._c_node) - _destroyFakeDoc(doc._c_doc, c_doc) - if c_path is NULL: - raise LxmlError, "Error creating node path." - path = c_path - tree.xmlFree(c_path) - return path - - def getiterator(self, tag=None): - """Creates an iterator for the root element. The iterator loops over all elements - in this tree, in document order. - """ - root = self.getroot() - if root is None: - return () - return root.getiterator(tag) - - def find(self, path): - """Finds the first toplevel element with given tag. Same as getroot().find(path). - """ - self._assertHasRoot() - root = self.getroot() - if path[:1] == "/": - path = "." + path - return root.find(path) - - def findtext(self, path, default=None): - """Finds the element text for the first toplevel element with given tag. Same as getroot().findtext(path) - """ - self._assertHasRoot() - root = self.getroot() - if path[:1] == "/": - path = "." + path - return root.findtext(path, default) - - def findall(self, path): - """Finds all toplevel elements with the given tag. Same as getroot().findall(path). - """ - self._assertHasRoot() - root = self.getroot() - if path[:1] == "/": - path = "." + path - return root.findall(path) - - # extensions to ElementTree API - def xpath(self, _path, namespaces=None, extensions=None, **_variables): - """XPath evaluate in context of document. - - ``namespaces`` is an optional dictionary with prefix to namespace URI - mappings, used by XPath. ``extensions`` defines additional extension - functions. - - Returns a list (nodeset), or bool, float or string. - - In case of a list result, return Element for element nodes, - string for text and attribute values. - - Note: if you are going to apply multiple XPath expressions - against the same document, it is more efficient to use - XPathEvaluator directly. - """ - self._assertHasRoot() - evaluator = XPathDocumentEvaluator(self, namespaces, extensions) - return evaluator.evaluate(_path, **_variables) - - def xslt(self, _xslt, extensions=None, access_control=None, **_kw): - """Transform this document using other document. - - xslt is a tree that should be XSLT - keyword parameters are XSLT transformation parameters. - - Returns the transformed tree. - - Note: if you are going to apply the same XSLT stylesheet against - multiple documents, it is more efficient to use the XSLT - class directly. - """ - self._assertHasRoot() - style = XSLT(_xslt, extensions=extensions, - access_control=access_control) - return style(self, **_kw) - - def relaxng(self, relaxng): - """Validate this document using other document. - - relaxng is a tree that should contain Relax NG XML - - Returns True or False, depending on whether validation - succeeded. - - Note: if you are going to apply the same Relax NG schema against - multiple documents, it is more efficient to use the RelaxNG - class directly. - """ - self._assertHasRoot() - schema = RelaxNG(relaxng) - return schema.validate(self) - - def xmlschema(self, xmlschema): - """Validate this document using other document. - - xmlschema is a tree that should contain XML Schema XML. - - Returns True or False, depending on whether validation - succeeded. - - Note: If you are going to apply the same XML Schema against - multiple documents, it is more efficient to use the XMLSchema - class directly. - """ - self._assertHasRoot() - schema = XMLSchema(xmlschema) - return schema.validate(self) - - def xinclude(self): - """Process the XInclude nodes in this document and include the - referenced XML fragments. - """ - cdef int result - # We cannot pass the XML_PARSE_NOXINCNODE option as this would free - # the XInclude nodes - there may still be Python references to them! - # Therefore, we allow XInclude nodes to be converted to - # XML_XINCLUDE_START nodes. XML_XINCLUDE_END nodes are added as - # siblings. Tree traversal will simply ignore them as they are not - # typed as elements. The included fragment is added between the two, - # i.e. as a sibling, which does not conflict with traversal. - self._assertHasRoot() - if self._context_node._doc._parser != None: - result = xinclude.xmlXIncludeProcessTreeFlags( - self._context_node._c_node, - self._context_node._doc._parser._parse_options) - else: - result = xinclude.xmlXIncludeProcessTree( - self._context_node._c_node) - if result == -1: - raise XIncludeError, "XInclude processing failed" - - def write_c14n(self, file): - """C14N write of document. Always writes UTF-8. - """ - self._assertHasRoot() - _tofilelikeC14N(file, self._context_node) - -cdef _ElementTree _elementTreeFactory(_Document doc, _NodeBase context_node): - return _newElementTree(doc, context_node, _ElementTree) - -cdef _ElementTree _newElementTree(_Document doc, _NodeBase context_node, - object baseclass): - cdef _ElementTree result - result = baseclass() - if context_node is None and doc is not None: - context_node = doc.getroot() - if context_node is None: - result._doc = doc - result._context_node = context_node - return result - -cdef public class _Element(_NodeBase) [ type LxmlElementType, - object LxmlElement ]: cdef object _tag cdef object _attrib + def _init(self): """Called after object initialisation. Custom subclasses may override this if they recursively call _init() in the superclasses. """ + def __dealloc__(self): + #print "trying to free node:", self._c_node + #displayNode(self._c_node, 0) + if self._c_node is not NULL: + unregisterProxy(self) + attemptDeallocation(self._c_node) + # MANIPULATORS - def __setitem__(self, Py_ssize_t index, _NodeBase element not None): + def __setitem__(self, Py_ssize_t index, _Element element not None): """Replaces the given subelement. """ cdef xmlNode* c_node @@ -709,7 +456,7 @@ """ cdef xmlNode* c_node cdef xmlNode* c_next - cdef _Element mynode + cdef _Element element # first, find start of slice if start == python.PY_SSIZE_T_MAX: c_node = NULL @@ -724,18 +471,18 @@ _appendChild(self, element) return # if the next element is in the list, insert before it - for mynode in value: - if mynode is None: + for element in value: + if element is None: raise TypeError, "Node must not be None." # store possible text tail - c_next = mynode._c_node.next + c_next = element._c_node.next # now move node previous to insertion point - tree.xmlUnlinkNode(mynode._c_node) - tree.xmlAddPrevSibling(c_node, mynode._c_node) + tree.xmlUnlinkNode(element._c_node) + tree.xmlAddPrevSibling(c_node, element._c_node) # and move tail just behind his node - _moveTail(c_next, mynode._c_node) + _moveTail(c_next, element._c_node) # move it into a new document - moveNodeToDocument(mynode, self._doc) + moveNodeToDocument(element, self._doc) def __deepcopy__(self, memo): return self.__copy__() @@ -987,9 +734,9 @@ def __contains__(self, element): cdef xmlNode* c_node - if not isinstance(element, _NodeBase): + if not isinstance(element, _Element): return 0 - c_node = (<_NodeBase>element)._c_node + c_node = (<_Element>element)._c_node return c_node is not NULL and c_node.parent is self._c_node def __iter__(self): @@ -1235,6 +982,7 @@ result._init() return result + cdef class __ContentOnlyElement(_Element): cdef int _raiseImmutable(self) except -1: raise TypeError, "this element does not have children or attributes" @@ -1321,9 +1069,263 @@ else: return "" % self.target + +cdef public class _ElementTree [ type LxmlElementTreeType, + object LxmlElementTree ]: + cdef _Document _doc + cdef _Element _context_node + + # Note that _doc is only used to store the original document if we do not + # have a _context_node. All methods should prefer self._context_node._doc + # to honour tree restructuring. _doc can happily be None! + + cdef _assertHasRoot(self): + """We have to take care here: the document may not have a root node! + This can happen if ElementTree() is called without any argument and + the caller 'forgets' to call parse() afterwards, so this is a bug in + the caller program. + """ + assert self._context_node is not None, \ + "ElementTree not initialized, missing root" + + def parse(self, source, _BaseParser parser=None): + """Updates self with the content of source and returns its root + """ + cdef _Document doc + doc = _parseDocument(source, parser) + self._context_node = doc.getroot() + if self._context_node is None: + self._doc = doc + else: + self._doc = None + return self._context_node + + def getroot(self): + """Gets the root element for this tree. + """ + return self._context_node + + def __copy__(self): + return ElementTree(self._context_node) + + def __deepcopy__(self, memo): + if self._context_node is None: + return ElementTree() + else: + return ElementTree( self._context_node.__copy__() ) + + property docinfo: + """Information about the document provided by parser and DTD. This + value is only defined for ElementTree objects based on the root node + of a parsed document (e.g. those returned by the parse functions). + """ + def __get__(self): + self._assertHasRoot() + return DocInfo(self._context_node._doc) + + property parser: + """The parser that was used to parse the document in this ElementTree. + """ + def __get__(self): + if self._context_node is not None and \ + self._context_node._doc is not None: + return self._context_node._doc._parser + return None + + def write(self, file, encoding=None, + pretty_print=False, xml_declaration=None): + """Write the tree to a file or file-like object. + + Defaults to ASCII encoding and writing a declaration as needed. + """ + cdef int c_write_declaration + self._assertHasRoot() + # suppress decl. in default case (purely for ElementTree compatibility) + if xml_declaration is not None: + c_write_declaration = bool(xml_declaration) + if encoding is None: + encoding = 'ASCII' + elif encoding is None: + encoding = 'ASCII' + c_write_declaration = 0 + else: + encoding = encoding.upper() + c_write_declaration = encoding not in \ + ('US-ASCII', 'ASCII', 'UTF8', 'UTF-8') + _tofilelike(file, self._context_node, encoding, + c_write_declaration, bool(pretty_print)) + + def getpath(self, _Element element not None): + """Returns a structural, absolute XPath expression to find that element. + """ + cdef _Document doc + cdef xmlDoc* c_doc + cdef char* c_path + doc = self._context_node._doc + if element._doc is not doc: + raise ValueError, "Element is not in this tree." + c_doc = _fakeRootDoc(doc._c_doc, self._context_node._c_node) + c_path = tree.xmlGetNodePath(element._c_node) + _destroyFakeDoc(doc._c_doc, c_doc) + if c_path is NULL: + raise LxmlError, "Error creating node path." + path = c_path + tree.xmlFree(c_path) + return path + + def getiterator(self, tag=None): + """Creates an iterator for the root element. The iterator loops over all elements + in this tree, in document order. + """ + root = self.getroot() + if root is None: + return () + return root.getiterator(tag) + + def find(self, path): + """Finds the first toplevel element with given tag. Same as getroot().find(path). + """ + self._assertHasRoot() + root = self.getroot() + if path[:1] == "/": + path = "." + path + return root.find(path) + + def findtext(self, path, default=None): + """Finds the element text for the first toplevel element with given tag. Same as getroot().findtext(path) + """ + self._assertHasRoot() + root = self.getroot() + if path[:1] == "/": + path = "." + path + return root.findtext(path, default) + + def findall(self, path): + """Finds all toplevel elements with the given tag. Same as getroot().findall(path). + """ + self._assertHasRoot() + root = self.getroot() + if path[:1] == "/": + path = "." + path + return root.findall(path) + + # extensions to ElementTree API + def xpath(self, _path, namespaces=None, extensions=None, **_variables): + """XPath evaluate in context of document. + + ``namespaces`` is an optional dictionary with prefix to namespace URI + mappings, used by XPath. ``extensions`` defines additional extension + functions. + + Returns a list (nodeset), or bool, float or string. + + In case of a list result, return Element for element nodes, + string for text and attribute values. + + Note: if you are going to apply multiple XPath expressions + against the same document, it is more efficient to use + XPathEvaluator directly. + """ + self._assertHasRoot() + evaluator = XPathDocumentEvaluator(self, namespaces, extensions) + return evaluator.evaluate(_path, **_variables) + + def xslt(self, _xslt, extensions=None, access_control=None, **_kw): + """Transform this document using other document. + + xslt is a tree that should be XSLT + keyword parameters are XSLT transformation parameters. + + Returns the transformed tree. + + Note: if you are going to apply the same XSLT stylesheet against + multiple documents, it is more efficient to use the XSLT + class directly. + """ + self._assertHasRoot() + style = XSLT(_xslt, extensions=extensions, + access_control=access_control) + return style(self, **_kw) + + def relaxng(self, relaxng): + """Validate this document using other document. + + relaxng is a tree that should contain Relax NG XML + + Returns True or False, depending on whether validation + succeeded. + + Note: if you are going to apply the same Relax NG schema against + multiple documents, it is more efficient to use the RelaxNG + class directly. + """ + self._assertHasRoot() + schema = RelaxNG(relaxng) + return schema.validate(self) + + def xmlschema(self, xmlschema): + """Validate this document using other document. + + xmlschema is a tree that should contain XML Schema XML. + + Returns True or False, depending on whether validation + succeeded. + + Note: If you are going to apply the same XML Schema against + multiple documents, it is more efficient to use the XMLSchema + class directly. + """ + self._assertHasRoot() + schema = XMLSchema(xmlschema) + return schema.validate(self) + + def xinclude(self): + """Process the XInclude nodes in this document and include the + referenced XML fragments. + """ + cdef int result + # We cannot pass the XML_PARSE_NOXINCNODE option as this would free + # the XInclude nodes - there may still be Python references to them! + # Therefore, we allow XInclude nodes to be converted to + # XML_XINCLUDE_START nodes. XML_XINCLUDE_END nodes are added as + # siblings. Tree traversal will simply ignore them as they are not + # typed as elements. The included fragment is added between the two, + # i.e. as a sibling, which does not conflict with traversal. + self._assertHasRoot() + if self._context_node._doc._parser != None: + result = xinclude.xmlXIncludeProcessTreeFlags( + self._context_node._c_node, + self._context_node._doc._parser._parse_options) + else: + result = xinclude.xmlXIncludeProcessTree( + self._context_node._c_node) + if result == -1: + raise XIncludeError, "XInclude processing failed" + + def write_c14n(self, file): + """C14N write of document. Always writes UTF-8. + """ + self._assertHasRoot() + _tofilelikeC14N(file, self._context_node) + +cdef _ElementTree _elementTreeFactory(_Document doc, _Element context_node): + return _newElementTree(doc, context_node, _ElementTree) + +cdef _ElementTree _newElementTree(_Document doc, _Element context_node, + object baseclass): + cdef _ElementTree result + result = baseclass() + if context_node is None and doc is not None: + context_node = doc.getroot() + if context_node is None: + result._doc = doc + result._context_node = context_node + return result + + cdef class _Attrib: - cdef _NodeBase _element - def __init__(self, _NodeBase element not None): + cdef _Element _element + def __init__(self, _Element element not None): self._element = element # MANIPULATORS @@ -1479,12 +1481,12 @@ cdef public class _ElementIterator(_ElementTagMatcher) [ object LxmlElementIterator, type LxmlElementIteratorType ]: # we keep Python references here to control GC - cdef _NodeBase _node + cdef _Element _node cdef _node_to_node_function _next_element def __iter__(self): return self - cdef void _storeNext(self, _NodeBase node): + cdef void _storeNext(self, _Element node): cdef xmlNode* c_node c_node = self._next_element(node._c_node) while c_node is not NULL and \ @@ -1498,7 +1500,7 @@ def __next__(self): cdef xmlNode* c_node - cdef _NodeBase current_node + cdef _Element current_node # Python ref: current_node = self._node if current_node is None: @@ -1508,7 +1510,7 @@ cdef class ElementChildIterator(_ElementIterator): "Iterates over the children of an element." - def __init__(self, _NodeBase node not None, reversed=False, tag=None): + def __init__(self, _Element node not None, reversed=False, tag=None): cdef xmlNode* c_node self._initTagMatch(tag) if reversed: @@ -1530,7 +1532,7 @@ You can pass the boolean keyword ``preceding`` to specify the direction. """ - def __init__(self, _NodeBase node not None, preceding=False, tag=None): + def __init__(self, _Element node not None, preceding=False, tag=None): self._initTagMatch(tag) if preceding: self._next_element = _previousElement @@ -1540,7 +1542,7 @@ cdef class AncestorsIterator(_ElementIterator): "Iterates over the ancestors of an element (from parent to parent)." - def __init__(self, _NodeBase node not None, tag=None): + def __init__(self, _Element node not None, tag=None): self._initTagMatch(tag) self._next_element = _parentElement self._storeNext(node) @@ -1560,9 +1562,9 @@ """ # we keep Python references here to control GC # keep next node to return and a depth counter in the tree - cdef _NodeBase _next_node - cdef _NodeBase _top_node - def __init__(self, _NodeBase node not None, tag=None, inclusive=True): + cdef _Element _next_node + cdef _Element _top_node + def __init__(self, _Element node not None, tag=None, inclusive=True): self._top_node = node self._next_node = node self._initTagMatch(tag) @@ -1577,7 +1579,7 @@ def __next__(self): cdef xmlNode* c_node - cdef _NodeBase current_node + cdef _Element current_node current_node = self._next_node if current_node is None: raise StopIteration @@ -1729,7 +1731,7 @@ """ return isinstance(element, _Element) -def dump(_NodeBase elem not None, pretty_print=True): +def dump(_Element elem not None, pretty_print=True): """Writes an element tree or element structure to sys.stdout. This function should be used for debugging only. """ @@ -1761,8 +1763,8 @@ else: write_declaration = bool(xml_declaration) - if isinstance(element_or_tree, _NodeBase): - return _tostring(<_NodeBase>element_or_tree, + if isinstance(element_or_tree, _Element): + return _tostring(<_Element>element_or_tree, encoding, write_declaration, c_pretty_print) elif isinstance(element_or_tree, _ElementTree): return _tostring((<_ElementTree>element_or_tree)._context_node, @@ -1782,8 +1784,8 @@ """ cdef int c_pretty_print c_pretty_print = bool(pretty_print) - if isinstance(element_or_tree, _NodeBase): - return _tounicode(<_NodeBase>element_or_tree, c_pretty_print) + if isinstance(element_or_tree, _Element): + return _tounicode(<_Element>element_or_tree, c_pretty_print) elif isinstance(element_or_tree, _ElementTree): return _tounicode((<_ElementTree>element_or_tree)._context_node, c_pretty_print) Modified: lxml/trunk/src/lxml/etreepublic.pxd ============================================================================== --- lxml/trunk/src/lxml/etreepublic.pxd (original) +++ lxml/trunk/src/lxml/etreepublic.pxd Wed Feb 14 15:19:02 2007 @@ -27,13 +27,10 @@ cdef class lxml.etree._Document [ object LxmlDocument ]: cdef tree.xmlDoc* _c_doc - cdef class lxml.etree._NodeBase [ object LxmlNodeBase ]: + cdef class lxml.etree._Element [ object LxmlElement ]: cdef _Document _doc cdef tree.xmlNode* _c_node - cdef class lxml.etree._Element(_NodeBase) [ object LxmlElement ]: - pass - cdef class lxml.etree.ElementBase(_Element) [ object LxmlElementBase ]: pass @@ -56,10 +53,10 @@ cdef _Element elementFactory(_Document doc, tree.xmlNode* c_node) # create an ElementTree for an Element - cdef _ElementTree elementTreeFactory(_NodeBase context_node) + cdef _ElementTree elementTreeFactory(_Element context_node) # create an ElementTree subclass for an Element - cdef _ElementTree newElementTree(_NodeBase context_node, object subclass) + cdef _ElementTree newElementTree(_Element context_node, object subclass) # create a new Element for an existing or new document (doc = None) # builds Python object after setting text, tail, namespaces and attributes @@ -101,7 +98,7 @@ char* c_ns, char* c_name) # return the value of attribute "{ns}name", or the default value - cdef object getAttributeValue(_NodeBase element, key, default) + cdef object getAttributeValue(_Element element, key, default) # return an iterator over attribute names (1), values (2) or items (3) # attributes must not be removed during iteration! @@ -109,11 +106,11 @@ # set an attribute value on an element # on failure, sets an exception and returns -1 - cdef int setAttributeValue(_NodeBase element, key, value) except -1 + cdef int setAttributeValue(_Element element, key, value) except -1 # delete an attribute # on failure, sets an exception and returns -1 - cdef int delAttribute(_NodeBase element, key) except -1 + cdef int delAttribute(_Element element, key) except -1 # delete an attribute based on name and namespace URI # returns -1 if the attribute was not found (no exception) @@ -153,12 +150,12 @@ cdef class lxml.etree._ElementIterator(_ElementTagMatcher) [ object LxmlElementIterator ]: - cdef _NodeBase _node + cdef _Element _node cdef tree.xmlNode* (*_next_element)(tree.xmlNode*) # store the initial node of the iterator if it matches the required tag # or its next matching sibling if not - cdef void iteratorStoreNext(_ElementIterator iterator, _NodeBase node) + cdef void iteratorStoreNext(_ElementIterator iterator, _Element node) ########################################################################## # other helper functions @@ -205,4 +202,4 @@ cdef _Document documentOrRaise(object input) # find the root Element of an Element (itself!), ElementTree or Document - cdef _NodeBase rootNodeOrRaise(object input) + cdef _Element rootNodeOrRaise(object input) Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Wed Feb 14 15:19:02 2007 @@ -196,16 +196,16 @@ functions would be reference counted too soon, during the XPath evaluation. This is most important in the case of exceptions. """ - cdef _NodeBase element - if isinstance(obj, _NodeBase): + cdef _Element element + if isinstance(obj, _Element): self._temp_refs.add(obj) - self._temp_refs.add((<_NodeBase>obj)._doc) + self._temp_refs.add((<_Element>obj)._doc) return elif _isString(obj) or not python.PySequence_Check(obj): return for o in obj: - if isinstance(o, _NodeBase): - element = <_NodeBase>o + if isinstance(o, _Element): + element = <_Element>o #print "Holding element:", element._c_node self._temp_refs.add(element) #print "Holding document:", element._doc._c_doc @@ -245,7 +245,7 @@ cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL: cdef xpath.xmlNodeSet* resultSet - cdef _NodeBase node + cdef _Element node if python.PyUnicode_Check(obj): obj = _utf8(obj) if python.PyString_Check(obj): @@ -256,13 +256,13 @@ return xpath.xmlXPathNewFloat(obj) if obj is None: resultSet = xpath.xmlXPathNodeSetCreate(NULL) - elif isinstance(obj, _NodeBase): - resultSet = xpath.xmlXPathNodeSetCreate((<_NodeBase>obj)._c_node) + elif isinstance(obj, _Element): + resultSet = xpath.xmlXPathNodeSetCreate((<_Element>obj)._c_node) elif python.PySequence_Check(obj): resultSet = xpath.xmlXPathNodeSetCreate(NULL) for element in obj: - if isinstance(element, _NodeBase): - node = <_NodeBase>element + if isinstance(element, _Element): + node = <_Element>element xpath.xmlXPathNodeSetAdd(resultSet, node._c_node) else: xpath.xmlXPathFreeNodeSet(resultSet) @@ -356,7 +356,7 @@ cdef void _extension_function_call(_BaseContext context, function, xpath.xmlXPathParserContext* ctxt, int nargs): - cdef _NodeBase node + cdef _Element node cdef _Document doc cdef xpath.xmlXPathObject* obj cdef int i Modified: lxml/trunk/src/lxml/iterparse.pxi ============================================================================== --- lxml/trunk/src/lxml/iterparse.pxi (original) +++ lxml/trunk/src/lxml/iterparse.pxi Wed Feb 14 15:19:02 2007 @@ -330,7 +330,7 @@ cdef char* _tag_name def __init__(self, element_or_tree, events=("end",), tag=None): - cdef _NodeBase root + cdef _Element root cdef int ns_count root = _rootNodeOrRaise(element_or_tree) self._event_filter = _buildIterparseEventFilter(events) @@ -369,8 +369,8 @@ return self def __next__(self): - cdef _NodeBase node - cdef _NodeBase next_node + cdef _Element node + cdef _Element next_node cdef int ns_count if python.PyList_GET_SIZE(self._events): return self._pop_event(0) @@ -406,7 +406,7 @@ return self._pop_event(0) raise StopIteration - cdef int _start_node(self, _NodeBase node): + cdef int _start_node(self, _Element node): cdef int ns_count if self._event_filter & ITERPARSE_FILTER_START_NS: ns_count = _appendStartNsEvents(node._c_node, self._events) @@ -420,8 +420,8 @@ python.PyList_Append(self._events, ("start", node)) return ns_count - cdef _NodeBase _end_node(self): - cdef _NodeBase node + cdef _Element _end_node(self): + cdef _Element node node, ns_count = self._pop_node() if self._event_filter & ITERPARSE_FILTER_END: if self._tag_tuple is None or \ Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Wed Feb 14 15:19:02 2007 @@ -4,19 +4,19 @@ # structure of the respective node to avoid multiple instantiation of # the Python class -cdef _NodeBase getProxy(xmlNode* c_node): +cdef _Element getProxy(xmlNode* c_node): """Get a proxy for a given node. """ #print "getProxy for:", c_node if c_node is not NULL and c_node._private is not NULL: - return <_NodeBase>c_node._private + return <_Element>c_node._private else: return None cdef int hasProxy(xmlNode* c_node): return c_node._private is not NULL -cdef registerProxy(_NodeBase proxy): +cdef registerProxy(_Element proxy): """Register a proxy and type for the node it's proxying for. """ cdef xmlNode* c_node @@ -28,7 +28,7 @@ assert c_node._private is NULL, "double registering proxy!" c_node._private = proxy -cdef unregisterProxy(_NodeBase proxy): +cdef unregisterProxy(_Element proxy): """Unregister a proxy for the node it's proxying for. """ cdef xmlNode* c_node @@ -154,14 +154,14 @@ c_node = c_doc.children tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_doc, c_node, 1) if c_node._private is not NULL: - (<_NodeBase>c_node._private)._c_node = NULL + (<_Element>c_node._private)._c_node = NULL tree.END_FOR_EACH_ELEMENT_FROM(c_node) tree.xmlFreeDoc(c_doc) ################################################################################ # change _Document references when a node changes documents -cdef void moveNodeToDocument(_NodeBase node, _Document doc): +cdef void moveNodeToDocument(_Element node, _Document doc): """For a node and all nodes below, change document. A node can change document in certain operations as an XML @@ -185,5 +185,5 @@ c_node = c_parent.children tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_parent, c_node, 1) if c_node._private is not NULL: - (<_NodeBase>c_node._private)._doc = doc + (<_Element>c_node._private)._doc = doc tree.END_FOR_EACH_ELEMENT_FROM(c_node) Modified: lxml/trunk/src/lxml/public-api.pxi ============================================================================== --- lxml/trunk/src/lxml/public-api.pxi (original) +++ lxml/trunk/src/lxml/public-api.pxi Wed Feb 14 15:19:02 2007 @@ -6,10 +6,10 @@ c_node = _copyNodeToDoc(c_root, doc._c_doc) return _elementFactory(doc, c_node) -cdef public _ElementTree elementTreeFactory(_NodeBase context_node): +cdef public _ElementTree elementTreeFactory(_Element context_node): return newElementTree(context_node, _ElementTree) -cdef public _ElementTree newElementTree(_NodeBase context_node, +cdef public _ElementTree newElementTree(_Element context_node, object subclass): if context_node is NULL or context_node is None: raise TypeError @@ -47,7 +47,7 @@ cdef public _Document documentOrRaise(object input): return _documentOrRaise(input) -cdef public _NodeBase rootNodeOrRaise(object input): +cdef public _Element rootNodeOrRaise(object input): return _rootNodeOrRaise(input) cdef public object textOf(xmlNode* c_node): @@ -77,16 +77,16 @@ char* ns, char* name): return _attributeValueFromNsName(c_element, ns, name) -cdef public object getAttributeValue(_NodeBase element, key, default): +cdef public object getAttributeValue(_Element element, key, default): return _getAttributeValue(element, key, default) cdef public object iterattributes(_Element element, int keysvalues): return _attributeIteratorFactory(element, keysvalues) -cdef public int setAttributeValue(_NodeBase element, key, value) except -1: +cdef public int setAttributeValue(_Element element, key, value) except -1: return _setAttributeValue(element, key, value) -cdef public int delAttribute(_NodeBase element, key) except -1: +cdef public int delAttribute(_Element element, key) except -1: return _delAttribute(element, key) cdef public int delAttributeFromNsName(tree.xmlNode* c_element, @@ -128,7 +128,7 @@ cdef public object namespacedNameFromNsName(char* href, char* name): return _namespacedNameFromNsName(href, name) -cdef public void iteratorStoreNext(_ElementIterator iterator, _NodeBase node): +cdef public void iteratorStoreNext(_ElementIterator iterator, _Element node): iterator._storeNext(node) cdef public void initTagMatch(_ElementTagMatcher matcher, tag): Modified: lxml/trunk/src/lxml/relaxng.pxi ============================================================================== --- lxml/trunk/src/lxml/relaxng.pxi (original) +++ lxml/trunk/src/lxml/relaxng.pxi Wed Feb 14 15:19:02 2007 @@ -20,7 +20,7 @@ cdef relaxng.xmlRelaxNG* _c_schema def __init__(self, etree=None, file=None): cdef _Document doc - cdef _NodeBase root_node + cdef _Element root_node cdef xmlNode* c_node cdef xmlDoc* fake_c_doc cdef char* c_href @@ -76,7 +76,7 @@ Returns true if document is valid, false if not.""" cdef python.PyThreadState* state cdef _Document doc - cdef _NodeBase root_node + cdef _Element root_node cdef xmlDoc* c_doc cdef relaxng.xmlRelaxNGValidCtxt* valid_ctxt cdef int ret Modified: lxml/trunk/src/lxml/serializer.pxi ============================================================================== --- lxml/trunk/src/lxml/serializer.pxi (original) +++ lxml/trunk/src/lxml/serializer.pxi Wed Feb 14 15:19:02 2007 @@ -1,6 +1,6 @@ # XML serialization and output functions -cdef _tostring(_NodeBase element, encoding, +cdef _tostring(_Element element, encoding, int write_xml_declaration, int pretty_print): "Serialize an element to an encoded string representation of its XML tree." cdef python.PyThreadState* state @@ -43,7 +43,7 @@ tree.xmlOutputBufferClose(c_buffer) return result -cdef _tounicode(_NodeBase element, int pretty_print): +cdef _tounicode(_Element element, int pretty_print): "Serialize an element to the Python unicode representation of its XML tree." cdef python.PyThreadState* state cdef tree.xmlOutputBuffer* c_buffer @@ -146,7 +146,7 @@ cdef int _closeFilelikeWriter(void* ctxt): return (<_FilelikeWriter>ctxt).close() -cdef _tofilelike(f, _NodeBase element, encoding, +cdef _tofilelike(f, _Element element, encoding, int write_xml_declaration, int pretty_print): cdef python.PyThreadState* state cdef _FilelikeWriter writer @@ -185,7 +185,7 @@ else: writer._exc_context._raise_if_stored() -cdef _tofilelikeC14N(f, _NodeBase element): +cdef _tofilelikeC14N(f, _Element element): cdef python.PyThreadState* state cdef _FilelikeWriter writer cdef tree.xmlOutputBuffer* c_buffer Modified: lxml/trunk/src/lxml/xmlid.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlid.pxi (original) +++ lxml/trunk/src/lxml/xmlid.pxi Wed Feb 14 15:19:02 2007 @@ -22,7 +22,7 @@ Note that you must not modify the XML tree if you use the ID dictionary. The results are undefined. """ - cdef _NodeBase root + cdef _Element root root = XML(text) # xml:id spec compatible implementation: use DTD ID attributes from libxml2 if root._doc._c_doc.ids is NULL: Modified: lxml/trunk/src/lxml/xmlschema.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlschema.pxi (original) +++ lxml/trunk/src/lxml/xmlschema.pxi Wed Feb 14 15:19:02 2007 @@ -19,7 +19,7 @@ cdef xmlschema.xmlSchema* _c_schema def __init__(self, etree=None, file=None): cdef _Document doc - cdef _NodeBase root_node + cdef _Element root_node cdef xmlDoc* fake_c_doc cdef xmlNode* c_node cdef char* c_href @@ -72,7 +72,7 @@ cdef python.PyThreadState* state cdef xmlschema.xmlSchemaValidCtxt* valid_ctxt cdef _Document doc - cdef _NodeBase root_node + cdef _Element root_node cdef xmlDoc* c_doc cdef int ret Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Wed Feb 14 15:19:02 2007 @@ -122,7 +122,7 @@ XPath evaluators must not be shared between threads. """ cdef _Element _element - def __init__(self, _NodeBase element not None, namespaces=None, extensions=None): + def __init__(self, _Element element not None, namespaces=None, extensions=None): cdef xpath.xmlXPathContext* xpathCtxt cdef int ns_register_status cdef _Document doc @@ -253,7 +253,7 @@ cdef xpath.xmlXPathContext* xpathCtxt cdef xpath.xmlXPathObject* xpathObj cdef _Document document - cdef _NodeBase element + cdef _Element element cdef _XPathContext context document = _documentOrRaise(_etree_or_element) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Wed Feb 14 15:19:02 2007 @@ -279,7 +279,7 @@ cdef xmlDoc* c_doc cdef xmlDoc* fake_c_doc cdef _Document doc - cdef _NodeBase root_node + cdef _Element root_node doc = _documentOrRaise(xslt_input) root_node = _rootNodeOrRaise(xslt_input) @@ -341,7 +341,7 @@ cdef python.PyThreadState* state cdef _XSLTContext context cdef _Document input_doc - cdef _NodeBase root_node + cdef _Element root_node cdef _Document result_doc cdef _Document profile_doc cdef xmlDoc* c_profile_doc From scoder at codespeak.net Thu Feb 15 09:30:34 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 15 Feb 2007 09:30:34 +0100 (CET) Subject: [Lxml-checkins] r38871 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20070215083034.9848E10079@code0.codespeak.net> Author: scoder Date: Thu Feb 15 09:30:32 2007 New Revision: 38871 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tests/test_elementtree.py Log: rich comparison of QName objects Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Feb 15 09:30:32 2007 @@ -8,6 +8,8 @@ Features added -------------- +* Rich comparison of QName objects + * Support for regular expressions in benchmark selection * get/set emulation (not .attrib!) for attributes on processing instructions Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Feb 15 09:30:32 2007 @@ -210,6 +210,9 @@ return self.text def __hash__(self): return self.text.__hash__() + def __richcmp__(one, other, int op): + return python.PyObject_RichCompare( + str(one), str(other), op) # forward declaration of _BaseParser, see parser.pxi Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Thu Feb 15 09:30:32 2007 @@ -2320,6 +2320,15 @@ self.assertEquals(a1.tag, "{myns}a") self.assertEquals(a2.tag, "{myns}a") + def test_qname_cmp(self): + etree = self.etree + qname1 = etree.QName('myns', 'a') + qname2 = etree.QName('myns', 'a') + self.assertEquals(qname1, "{myns}a") + self.assertEquals("{myns}a", qname2) + self.assertEquals(qname1, qname1) + self.assertEquals(qname1, qname2) + def _writeElement(self, element, encoding='us-ascii'): """Write out element for comparison. """ From scoder at codespeak.net Wed Feb 14 15:20:33 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 14 Feb 2007 15:20:33 +0100 (CET) Subject: [Lxml-checkins] r38837 - in lxml/pyrex: . Doc Pyrex/Compiler Pyrex/Mac Message-ID: <20070214142033.178E110089@code0.codespeak.net> Author: scoder Date: Wed Feb 14 15:20:32 2007 New Revision: 38837 Added: lxml/pyrex/Makefile lxml/pyrex/Pyrex/Compiler/ModuleNode.py Modified: lxml/pyrex/CHANGES.txt lxml/pyrex/Doc/extension_types.html lxml/pyrex/Doc/overview.html lxml/pyrex/Doc/sharing.html lxml/pyrex/Doc/special_methods.html lxml/pyrex/Pyrex/Compiler/Code.py lxml/pyrex/Pyrex/Compiler/ExprNodes.py lxml/pyrex/Pyrex/Compiler/Nodes.py lxml/pyrex/Pyrex/Compiler/Parsing.py lxml/pyrex/Pyrex/Compiler/PyrexTypes.py lxml/pyrex/Pyrex/Compiler/Symtab.py lxml/pyrex/Pyrex/Compiler/Version.py lxml/pyrex/Pyrex/Mac/DarwinSystem.py lxml/pyrex/ToDo.txt lxml/pyrex/setup.py Log: updated to 0.9.5.1a (+ enum work around) Modified: lxml/pyrex/CHANGES.txt ============================================================================== --- lxml/pyrex/CHANGES.txt (original) +++ lxml/pyrex/CHANGES.txt Wed Feb 14 15:20:32 2007 @@ -1,3 +1,139 @@ +0.9.5.1a +-------- + +Bug fixes: + + - Package list now calculated dynamically in setup.py so that + it will work with or without the testing framework installed. + + +0.9.5.1 +------- + +Bug fixes: + + - Comparing two values of the same enum type incorrectly + produced an error. [Anders Gustafsson ] + + - Compiler crash caused by assigning a Python value to + a variable of an enum type. [Peter Johnson ] + + - Comparison between pointer and array incorrectly produced + a type mismatch error. + [Helmut Jarausch ] + + - Unused local Python variable had spurious init/cleanup code + generated for it, causing C compilation errors. + [Helmut Jarausch ] + + - Updated list of packages in setup.py. + +Modifications: + + - NULL in Pyrex source now translated into NULL instead of 0 + in C code, to allow for the possibility of calling something + not defined with a prototype in an external header. + [Adapted Cat ] + + +0.9.5 +----- + +Enhancements: + + - Exception return values may now be specified by arbitrary + constant expressions of appropriate type, not just literals. + [Stefan Behnel ] + + - Redundant type check now omitted when passing a literal None + to a function expecting an extension type. + [Patch by Sam Rushing ] + + - New-style classes now allowed as exceptions for compatibility + with Python 2.5 (inheritance from BaseException not currently + checked). + [Stefan Behnel ] + + - Sequence unpacking is now done using the iterator protocol + instead of indexing. + + - Allocation of an empty tuple is avoided when making a + Python call with no arguments. + [Stefan Behnel ] + + - Most warnings about unused variables and labels have been + eliminated. + + - Support for running the test suite on Linux added but not + yet fully tested. [Based in part on patch by Eric Wald + ]. + + - Makefile included for compiling the patched Carbon File module + used by the MacOSX test code. + +Modifications: + + - Type rules for enums tightened for compatibility with C++. + + - Direct assignment from float to int disallowed to prevent + C++ compilation warnings. + + - Hex literals left as hex in C code to avoid warnings from + the C compiler about decimal constants becoming unsigned. + +Bug fixes: + + - Exception raised during argument conversion could cause crash + due to uninitialised local variables. + [Konrad Hinsen ] + + - Assignment to a C attribute of an extension type from a + different type could generate C code with a pointer type + mismatch. [Atsuo Ishimoto ] + + - Backslash in a string literal before a non-special character + was not handled correctly. [Yuan Mang ] + + - Temporary vars used by del statement not being properly + released, sometimes leading to double decrefs. + [Jiba ] + + - A return statement whose expression raises an exception + inside a try-except that catches the exception could cause + a crash. [Anders Gustafsson ] + + - Fixed type compatibility checking problem between pointers + and arrays. [Lenard Lindstrom ] + + - Circular imports between modules defining extension types + caused unresolvable import order conflicts. + [Mike Wyatt ] + + - Cimporting multiple submodules from the same package caused + a redefined name error for the top level name. + [Martin Albrecht ] + + - Incorrect reference counting when assigning to an element of an + array that is a C attribute of an extension type. + [Igor Khavkine ] + + - Weak-referenceable extension types were not implemented + properly. [Chris Perkins , + Peter Johnson ] + + - Crash if C variable declared readonly outside an extension + type definition. [Eric Huss ] + +Doc updates: + + - Expanded discussion of the need for type declarations to enable + access to attributes of extension types. + + - Added a section "Source Files and Compilation" explaining the + rules for naming of source files of modules residing in packages, + and instructions for using the compiler and distutils extension. + + 0.9.4.1 ------- Modified: lxml/pyrex/Doc/extension_types.html ============================================================================== --- lxml/pyrex/Doc/extension_types.html (original) +++ lxml/pyrex/Doc/extension_types.html Wed Feb 14 15:20:32 2007 @@ -1,48 +1,148 @@ - - - Extension Types + + + + + + + + + + + + + + + Extension Types + + + -


Extension Types + + + +

+
Extension Types

-

Contents

-
    + + + +

    Contents

    + + + + + + + +

    Introduction

    + + As well as creating normal user-defined classes with the Python class statement, Pyrex also lets you create new built-in Python types, known as extension types. You define an extension type using the cdef class statement. Here's an example: -
    cdef class Shrubbery:

        cdef int width, height

    -

        def __init__(self, w, h):
    +

    cdef class Shrubbery: + +

        cdef int width, height

    + + + + +

        def __init__(self, w, h):
    + +         self.width = w
    + +         self.height = h

    -

        def describe(self):
    + + + + +

        def describe(self):
    + +         print "This shrubbery is", self.width, \
    + +             "by", self.height, "cubits."

    + +
    + + As you can see, a Pyrex extension type definition looks a lot like a Python class definition. Within it, you use the def statement to define methods that can be called from Python code. You can even define many of @@ -52,7 +152,12 @@ extension type), or they may be of any C data type. So you can use extension types to wrap arbitrary C data structures and provide a Python-like interface to them.

    -

    Attributes

    + + + +

    Attributes

    + + Attributes of an extension type are stored directly in the object's C struct. The set of attributes is fixed at compile time; you can't add attributes to an extension type instance at run time simply by assigning to them, as @@ -62,12 +167,24 @@ by Python attribute lookup, or by direct access to the C struct from Pyrex code. Python code is only able to access attributes of an extension type by the first method, but Pyrex code can use either method.

    -

    By default, extension type attributes are only accessible by direct access, + + + +

    By default, extension type attributes are only accessible by direct access, not Python access, which means that they are not accessible from Python code. To make them accessible from Python code, you need to declare them as public or readonly. For example,

    -
    cdef class Shrubbery:
    + + + +
    cdef class Shrubbery:
    + +     cdef public int width, height
    + +     cdef readonly float depth
    + + makes the width and height attributes readable and writable from Python code, and the depth attribute readable but not writable. @@ -76,21 +193,64 @@ although read-write exposure is only possible for generic Python attributes (of type object). If the attribute is declared to be of an extension type, it must be exposed readonly.

    -

    Note also that the public and readonly options apply + + + +

    Note also that the public and readonly options apply only to Python access, not direct access. All the attributes of an -extension type are always readable and writable by direct access.

    -

    Howerver, for direct access to be possible, the Pyrex compiler must know +extension type are always readable and writable by direct access.

    + +

    Type declarations

    + + + +

    Before you can directly access the attributes of an extension type, the Pyrex compiler must know that you have an instance of that type, and not just a generic Python object. It knows this already in the case of the "self" parameter of the methods of -that type, but in other cases you will have to tell it by means of a declaration. -For example,

    -
    cdef widen_shrubbery(Shrubbery sh, extra_width):
    +that type, but in other cases you will have to use a type declaration.

    + +

    For example, in the following function,

    + +
    cdef widen_shrubbery(sh, extra_width): # BAD
    + +     sh.width = sh.width + extra_width
    - If you attempt to access an extension type attribute through a generic -object reference, Pyrex will use a Python attribute lookup. If the attribute -is exposed for Python access (using public or readonly) -then this will work, but it will be much slower than direct access. + +

    because the sh parameter hasn't been given a type, the width +attribute will be accessed by a Python attribute lookup. If the +attribute has been declared public or readonly then this will work, but +it will be very inefficient. If the attribute is private, it will not work at all -- the +code will compile, but an attribute error will be raised at run time.

    + +

    The solution is to declare sh as being of type Shrubbery, as follows:

    + + + +
    cdef widen_shrubbery(Shrubbery sh, extra_width):
    + + +     sh.width = sh.width + extra_width
    + +Now the Pyrex compiler knows that sh has a C attribute called width and will generate code to access it directly and efficiently. The same consideration applies to local variables, for example,
    + +
    + +
    cdef Shrubbery another_shrubbery(Shrubbery sh1):
    + +    cdef Shrubbery sh2
    + +    sh2 = Shrubbery()
    + +    sh2.width = sh1.width
    + +    sh2.height = sh1.height
    + +    return sh2
    + +

    Extension types and None

    + + When you declare a parameter or C variable as being of an extension type, Pyrex will allow it to take on the value None as well as values of its declared type. This is analogous to the way a C pointer can take on the value NULL, @@ -103,223 +263,589 @@

    You need to be particularly careful when exposing Python functions which take extension types as arguments. If we wanted to make widen_shrubbery a Python function, for example, if we simply wrote

    -
    def widen_shrubbery(Shrubbery sh, extra_width): # This is
    + + + +
    def widen_shrubbery(Shrubbery sh, extra_width): # This is
    + +     sh.width = sh.width + extra_width           # dangerous!
    + + then users of our module could crash it by passing None for the sh parameter.

    One way to fix this would be

    -
    def widen_shrubbery(Shrubbery sh, extra_width):
    + + + +
    def widen_shrubbery(Shrubbery sh, extra_width):
    + +     if sh is None:
    + +         raise TypeError
    + +     sh.width = sh.width + extra_width
    + + but since this is anticipated to be such a frequent requirement, Pyrex provides a more convenient way. Parameters of a Python function declared as an extension type can have a not None clause:
    def widen_shrubbery(Shrubbery sh not None, extra_width):
    + +     sh.width = sh.width + extra_width
    + + Now the function will automatically check that sh is not None along with checking that it has the right type.

    Note, however that the not None clause can only be used in Python functions (defined with def) and not C functions (defined with cdef). If you need to check whether a parameter to a C function is None, you will need to do it yourself.

    -

    Some more things to note:

    -
      + + + +

      Some more things to note:

      + + + +
        + +
      • The self parameter of a method of an extension type is guaranteed never to be None.
      • -
      -
        + + + +
      + + + +
        + +
      • When comparing a value with None, keep in mind that, if x is a Python object, x is None and x is not None are very efficient because they translate directly to C pointer comparisons, whereas x == None and x != None, or simply using x as a boolean value (as in if x: ...) will invoke Python operations and therefore be much slower.
      • -
      -

      Special methods

      + + + +
    + + + +

    Special methods

    + + Although the principles are similar, there are substantial differences between many of the __xxx__ special methods of extension types and their Python counterparts. There is a separate page devoted to this subject, and you should read it carefully before attempting to use any special methods in your extension types.

    Properties

    + + There is a special syntax for defining properties in an extension class: -
    cdef class Spam:

        property cheese:

    -

            "A doc string can go +

    cdef class Spam: + +

        property cheese:

    + + + + +

            "A doc string can go here."

    -

            def __get__(self): + + + + +

            def __get__(self):
    + +             # This is called when the property is read.
    + +             ...

    -

            def __set__(self, value): + + + + +

            def __set__(self, value):
    + +             # This is called when the property is written.
    + +             ...

    -

            def __del__(self): + + + + +

            def __del__(self):
    + +             # This is called when the property is deleted.
    + +  

    + +
    + + The __get__, __set__ and __del__ methods are all optional; if they are omitted, an exception will be raised when the corresponding operation is attempted.

    Here's a complete example. It defines a property which adds to a list each time it is written to, returns the list when it is read, and empties the list when it is deleted.
    + +  

    -
    + + + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -
    cheesy.pyxTest input
    cdef class CheeseShop: -

      cdef object cheeses

    -

      def __new__(self):
    + + +

      cdef object cheeses

    + + + + +

      def __new__(self):
    + +     self.cheeses = []

    -

      property cheese:

    -

        def __get__(self):
    + + + + +

      property cheese:

    + + + + +

        def __get__(self):
    + +       return "We don't have: %s" % self.cheeses

    -

        def __set__(self, value):
    + + + + +

        def __set__(self, value):
    + +       self.cheeses.append(value)

    -

        def __del__(self):
    + + + + +

        def __del__(self):
    + +       del self.cheeses[:]

    + +
    from cheesy import CheeseShop -

    shop = CheeseShop()
    + + +

    shop = CheeseShop()
    + + print shop.cheese

    -

    shop.cheese = "camembert"
    + + + + +

    shop.cheese = "camembert"
    + + print shop.cheese

    -

    shop.cheese = "cheddar"
    + + + + +

    shop.cheese = "cheddar"
    + + print shop.cheese

    -

    del shop.cheese
    + + + + +

    del shop.cheese
    + + print shop.cheese

    + +
    Test output
    We don't have: []
    + + We don't have: ['camembert']
    + + We don't have: ['camembert', 'cheddar']
    + + We don't have: []
    + + + + + + + +
    -

    Subclassing

    + + + +

    Subclassing

    + + An extension type may inherit from a built-in type or another extension type:
    cdef class Parrot:
    -     ...

    cdef class Norwegian(Parrot):
    + + +     ... + +

    cdef class Norwegian(Parrot):
    + +     ...

    + +
    -


    + + + +


    + + A complete definition of the base type must be available to Pyrex, so if the base type is a built-in type, it must have been previously declared as an extern extension type. If the base type is defined in another Pyrex module, it must either be declared as an extern extension type or imported using the cimport statement.

    -

    An extension type can only have one base class (no multiple inheritance). + + + +

    An extension type can only have one base class (no multiple inheritance).

    -

    Pyrex extension types can also be subclassed in Python. A Python class + + + +

    Pyrex extension types can also be subclassed in Python. A Python class can inherit from multiple extension types provided that the usual Python rules for multiple inheritance are followed (i.e. the C layouts of all the base classes must be compatible).
    + +

    -

    C methods

    + + + +

    C methods

    + + Extension types can have C methods as well as Python methods. Like C functions, C methods are declared using cdef instead of def. C methods are "virtual", and may be overridden in derived extension types.
    + +
    - + + + +
    + + + + + + + + + + + + + + + + + + -
    pets.pyx
    + +
    Output
    + +
    cdef class Parrot:
    + +
    + +   cdef void describe(self):
    + +     print "This parrot is resting."
    + +
    + + cdef class Norwegian(Parrot):
    + +
    + +   cdef void describe(self):
    + +     Parrot.describe(self)
    + +     print "Lovely plumage!"
    + +
    + +
    + + cdef Parrot p1, p2
    + + p1 = Parrot()
    + + p2 = Norwegian()
    + + print "p1:"
    + + p1.describe()
    + + print "p2:"
    + + p2.describe()

    + +
    p1:
    + + This parrot is resting.
    + + p2:
    + +
    This parrot is resting.
    + +
    Lovely plumage!
    + +
    + + + + + + + +
    + + The above example also illustrates that a C method can call an inherited C method using the usual Python technique, i.e.
    + +
    Parrot.describe(self)
    + +
    -

    Forward-declaring extension types

    + + + +

    Forward-declaring extension types

    + + Extension types can be forward-declared, like struct and union types. This will be necessary if you have two extension types that need to refer to each other, e.g. -
    cdef class Shrubbery # forward declaration

    cdef class Shrubber:
    +

    cdef class Shrubbery # forward declaration + +

    cdef class Shrubber:
    + +     cdef Shrubbery work_in_progress

    -

    cdef class Shrubbery:
    + + + + +

    cdef class Shrubbery:
    + +     cdef Shrubber creator

    + +
    + + If you are forward-declaring an exension type that has a base class, you must specify the base class in both the forward declaration and its subsequent definition, for example,
    + +
    cdef class A(B)
    + +
    + + ...
    + +
    + + cdef class A(B):
    + +     # attributes and methods

    + +
    -

    Making extension types weak-referenceable

    By + + + +

    Making extension types weak-referenceable

    + +By default, extension types do not support having weak references made to them. You can enable weak referencing by declaring a C attribute of type object called __weakref__. For example,
    + +
    + +
    cdef class ExplodingAnimal:
    + +     """This animal will self-destruct when it is
    + +        no longer strongly referenced."""
    + +    
    + +     cdef object __weakref__
    + +
    + +
    + +

    Public and external extension types

    + + Extension types can be declared extern or public. An extern extension type declaration makes an extension type defined in external C code available to a Pyrex module. A public extension type declaration makes an extension type defined in a Pyrex module available to external C code.

    External extension types

    + + An extern extension type allows you to gain access to the internals of Python objects defined in the Python core or in a non-Pyrex extension module. @@ -328,80 +854,191 @@ module. While you can still do that, Pyrex 0.8 and later provides a better mechanism for this. See Sharing C Declarations Between Pyrex Modules.
    + + Here is an example which will let you get at the C-level members of the built-in complex object. -
    cdef extern from "complexobject.h":

        struct Py_complex:
    +

    cdef extern from "complexobject.h": + +

        struct Py_complex:
    + +         double real
    + +         double imag

    -

        ctypedef class __builtin__.complex [object PyComplexObject]: + + + + +

        ctypedef class __builtin__.complex [object PyComplexObject]:
    + +         cdef Py_complex cval

    -

    # A function which uses the above type
    + + + + +

    # A function which uses the above type
    + + def spam(complex c):
    + +     print "Real:", c.cval.real
    + +     print "Imag:", c.cval.imag

    + +
    + + Some important things to note are:
      + +
    1. In this example, ctypedef class has been used. This is because, in the Python header files, the PyComplexObject struct is declared with
      + +
      + + + +
      ctypedef struct {
      + +     ...
      + + } PyComplexObject;
      + +
      + +
      -
    2. As well as the name of the extension type, the module in which + + +
    3. + +
    4. As well as the name of the extension type, the module in which its type object can be found is also specified. See the implicit importing section below. 
      + +
      + +
    5. -
    6. When declaring an external extension type, you don't declare + + +
    7. When declaring an external extension type, you don't declare any methods. Declaration of methods is not required in order to call them, because the calls are Python method calls. Also, as with structs and unions, if your extension class declaration is inside a cdef extern from block, you only need to declare those C members which you wish to access.
    8. -
    -

    Implicit importing

    -
    Backwards Incompatibility Note: + + + + + + + +

    Implicit importing

    + + + +
    Backwards Incompatibility Note: You will have to update any pre-0.8 Pyrex modules you have which use extern extension types. I apologise for this, but for complicated reasons it proved to be too difficult to continue supporting the old way of doing these while introducing the new features that I wanted.
    + + Pyrex 0.8 and later requires you to include a module name in an extern extension class declaration, for example,
    cdef extern class MyModule.Spam:
    + +     ...
    + + The type object will be implicitly imported from the specified module and bound to the corresponding name in this module. In other words, in this example an implicit
      -
      from MyModule import Spam
      -
    + + + + +
    from MyModule import Spam
    + + + + + + statement will be executed at module load time.

    The module name can be a dotted name to refer to a module inside a package hierarchy, for example,

    -
    cdef extern class My.Nested.Package.Spam:
    + + + +
    cdef extern class My.Nested.Package.Spam:
    + +     ...
    + + You can also specify an alternative name under which to import the type using an as clause, for example,
      + + cdef extern class My.Nested.Package.Spam as Yummy:
      -    ...
    + + +    ... + + + which corresponds to the implicit import statement
      -
      from My.Nested.Package import Spam as Yummy
      -
    -

    Type names vs. constructor names

    + + + + +
    from My.Nested.Package import Spam as Yummy
    + + + + + + + +

    Type names vs. constructor names

    + + Inside a Pyrex module, the name of an extension type serves two distinct purposes. When used in an expression, it refers to a module-level global variable holding the type's constructor (i.e. its type-object). However, it can also be used as a C type name to declare variables, arguments and return values of that type.

    When you declare

    -
    cdef extern class MyModule.Spam:
    + + + +
    cdef extern class MyModule.Spam:
    + +     ...
    + + the name Spam serves both these roles. There may be other names by which you can refer to the constructor, but only Spam can be used as a type name. For example, if you were to explicity import MyModule, @@ -409,21 +1046,34 @@ wouldn't be able to use MyModule.Spam as a type name.

    When an as clause is used, the name specified in the as clause also takes over both roles. So if you declare

    -
    cdef extern class MyModule.Spam as Yummy:
    + + + +
    cdef extern class MyModule.Spam as Yummy:
    + +     ...
    + + then Yummy becomes both the type name and a name for the constructor. Again, there are other ways that you could get hold of the constructor, but only Yummy is usable as a type name.

    Public extension types

    + + An extension type can be declared public, in which case a .h file is generated containing declarations for its object struct and type object. By including the .h file in external C code that you write, that code can access the attributes of the extension type.

    Name specification clause

    + + The part of the class declaration in square brackets is a special feature only available for extern or public extension types. The full form of this clause is
    [object object_struct_name, type type_object_name ]
    + + where object_struct_name is the name to assume for the type's C struct, and type_object_name is the name to assume for the type's statically declared type object. (The object and type clauses can be written @@ -433,12 +1083,30 @@ generate code that is compatible with the declarations in the header file. Otherwise, for extern extension types, the object clause is optional.

    -

    For public extension types, the object and type clauses + + + +

    For public extension types, the object and type clauses are both required, because Pyrex must be able to generate code that is compatible with external C code.

    -

    -

    + + + +

    + + + +

    + + Back to the Language Overview
    + +  
    + +
    - \ No newline at end of file + + + + Modified: lxml/pyrex/Doc/overview.html ============================================================================== --- lxml/pyrex/Doc/overview.html (original) +++ lxml/pyrex/Doc/overview.html Wed Feb 14 15:20:32 2007 @@ -1,108 +1,216 @@ + + + - - Pyrex Language Overview - + + Pyrex Language Overview + + -


    Overview of the Pyrex Language 

    +

    +
    Overview of the Pyrex Language  +

    + This document informally describes the extensions to the Python language made by Pyrex. Some day there will be a reference manual covering everything - in more detail.
    + in more detail. 

    Contents

    -   -

    Contents

    + -


    Basics +


    +

    Source Files and Compilation
    +

    +

    +Pyrex source file names consist of the name of the module followed by a .pyx extension, for example a module called primes would have a source file named primes.pyx.
    + +
    - This section describes the basic features of the Pyrex language. The facilities +If your module is destined to live in a package, the source file name should include the full dotted name that the module will eventually have. For example, a module called primes that will be installed in a package called numbers should have a source file called numbers.primes.pyx. +This will ensure that the __name__ properties of the module and any +classes defined in it are set correctly. If you don't do this, you may +find that pickling doesn't work, among other problems. It also ensures +that the Pyrex compiler has the right idea about the layout of the +module namespace, which can be important when accessing extension types +defined in other modules.
    +
    +Once you have written your .pyx file, there are a couple of ways of +turning it into an extension module. One way is to compile it manually +with the Pyrex compiler, e.g.
    +
    +
    pyrexc primes.pyx
    +
    +
    +This will produce a file called primes.c, +which then needs to be compiled with the C compiler using whatever +options are appropriate on your platform for generating an extension +module. There's a Makefile in the Demos directory (called Makefile.nodistutils) that shows how to do this for Linux.
    +
    +The other, and probably better, way is to use the distutils extension provided with Pyrex. See the Setup.py +file in the Demos directory for an example of how to use it. This +method has the advantage of being cross-platform -- the same setup file +should work on any platform where distutils can compile an extension +module.
    +
    +
    +

    Language Basics +

    +This section describes the basic features of the Pyrex language. The facilities covered in this section allow you to create Python-callable functions that manipulate C data structures and convert between Python and C data types. - Later sections will cover facilities for wrapping external C code, creating new Python types and cooperation between Pyrex modules. + Later sections will cover facilities for wrapping external C code, creating new Python types and cooperation between Pyrex modules.
    +

    Python functions vs. C functions

    + There are two kinds of function definition in Pyrex:

    Python functions are defined using the def statement, as in Python. They take Python objects as parameters and return Python objects.

    +

    C functions are defined using the new cdef statement. They take either Python objects or C values as parameters, and can return either Python objects or C values.

    +

    Within a Pyrex module, Python functions and C functions can call each other freely, but only Python functions can be called from outside the module by interpreted Python code. So, any functions that you want to "export" from your Pyrex module must be declared as Python functions using def.

    +

    Parameters of either type of function can be declared to have C data types, using normal C declaration syntax. For example,

    + -
    def spam(int i, char *s):
        ...
    -
    cdef int eggs(unsigned long l, float f):
        ...
    +
    +
    def spam(int i, char *s):
        ...
    + + +
    cdef int eggs(unsigned long l, float f):
        ...
    +
    + When a parameter of a Python function is declared to have a C data type, it is passed in as a Python object and automatically converted to a C value, if possible. Automatic conversion is currently only possible for numeric @@ -111,17 +219,22 @@

    C functions, on the other hand, can have parameters of any type, since they're passed in directly using a normal C function call.

    +

    Python objects as parameters and return values

    + If no type is specified for a parameter or return value, it is assumed to be a Python object. (Note that this is different from the C convention, where it would default to int.) For example, the following defines a C function that takes two Python objects as parameters and returns a Python object: -
    cdef spamobjs(x, y):
        ...
    +
    +
    cdef spamobjs(x, y):
        ...
    +
    + Reference counting for these objects is performed automatically according to the standard Python/C API rules (i.e. borrowed references are taken as parameters and a new reference is returned). @@ -129,126 +242,217 @@ as a Python object. This can be useful if the name being declared would otherwise be taken as the name of a type, for example,

    + -
    cdef ftang(object int):
        ...
    +
    +
    cdef ftang(object int):
        ...
    +
    + declares a parameter called int which is a Python object. You can also use object as the explicit return type of a function, e.g. -
    cdef object ftang(object int):
        ...
    +
    +
    cdef object ftang(object int):
        ...
    +
    + In the interests of clarity, it is probably a good idea to always be explicit about object parameters in C functions.

    C variable and type definitions

    + The cdef statement is also used to declare C variables, either local or module-level: -
    cdef int i, j, k
    cdef float f, g[42], *h
    +
    +
    cdef int i, j, k
    cdef float f, g[42], *h
    +
    + and C struct, union or enum types: -
    cdef struct Grail:
        int age
        float volume
    -
    cdef union Food:
        char *spam
        float *eggs
    -
    cdef enum CheeseType:
        cheddar, edam, 
        camembert
    -
    cdef enum CheeseState:
        hard = 1
        soft = 2
        runny = 3
    +
    +
    cdef struct Grail:
        int age
        float volume
    + + +
    cdef union Food:
        char *spam
        float *eggs
    + + +
    cdef enum CheeseType:
        cheddar, edam, 
        camembert
    + + +
    cdef enum CheeseState:
        hard = 1
        soft = 2
        runny = 3
    +
    + There is currently no special syntax for defining a constant, but you can use an anonymous enum declaration for this purpose, for example,
    cdef enum:
    +     tons_of_spam = 3
    + Note that the words struct, union and enum are used only when defining a type, not when referring to it. For example, to declare a variable pointing to a Grail you would write -
    cdef Grail *gp
    +
    +
    cdef Grail *gp
    +
    + and not -
    cdef struct Grail *gp # WRONG
    +
    +
    cdef struct Grail *gp # WRONG
    +
    + There is also a ctypedef statement for giving names to types, e.g. -
    ctypedef unsigned long ULong
    -
    ctypedef int *IntPtr
    +
    +
    ctypedef unsigned long ULong
    + + +
    ctypedef int *IntPtr
    +
    +

    Automatic type conversions

    + In most situations, automatic conversions will be performed for the basic numeric and string types when a Python object is used in a context requiring a C value, or vice versa. The following table summarises the conversion possibilities.
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    C types
    +
    From Python types
    +
    To Python types
    +
    [unsigned] char
    + [unsigned] short
    + int, long
    int, long
    +
    int
    +
    unsigned int
    + unsigned long
    + [unsigned] long long
    +
    int, long
    +
    +
    long
    +
    +
    float, double, long double
    +
    int, long, float
    +
    float
    +
    char *
    +
    str
    +
    str
    +
    +
    +

    Caveats when using a Python string in a C context

    + You need to be careful when using a Python string in a context expecting a char *. In this situation, a pointer to the contents of the Python string is used, which is only valid as long as the Python string exists. So you @@ -257,12 +461,16 @@ the Python string will live long enough, you will need to copy the C string.
    +
    + Pyrex detects and prevents some mistakes of this kind. For instance, if you attempt something like
    +
    cdef char *s
    s = pystring1 + pystring2
    + then Pyrex will produce the error message "Obtaining char * from temporary Python value". The reason is that concatenating the two Python strings produces a new Python string object that is referenced only by a temporary internal @@ -270,28 +478,37 @@ the temporary variable will be decrefed and the Python string deallocated, leaving s dangling. Since this code could not possibly work, Pyrex refuses to compile it.
    +
    + The solution is to assign the result of the concatenation to a Python variable, and then obtain the char * from that, i.e.
    +
    cdef char *s
    p = pystring1 + pystring2
    s = p
    + It is then your responsibility to hold the reference p for as long as necessary.
    +
    + Keep in mind that the rules used to detect such errors are only heuristics. Sometimes Pyrex will complain unnecessarily, and sometimes it will fail to detect a problem that exists. Ultimately, you need to understand the issue and be careful what you do