From scoder at codespeak.net Sat Feb 10 18:36:08 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 10 Feb 2007 18:36:08 +0100 (CET) Subject: [Lxml-checkins] r38403 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20070210173608.528F7100A5@code0.codespeak.net> Author: scoder Date: Sat Feb 10 18:36:06 2007 New Revision: 38403 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/tests/test_xslt.py lxml/trunk/src/lxml/xslt.pxi Log: get()/set() methods for PI elements Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Feb 10 18:36:06 2007 @@ -8,8 +8,10 @@ Features added -------------- -* ElementInclude module for ElementTree compatible XInclude processing that - honours custom resolvers registered with the source document +* get/set emulation (not .attrib!) for attributes on processing instructions + +* ElementInclude Python module for ElementTree compatible XInclude processing + that honours custom resolvers registered with the source document * ElementTree.parser property holds the parser used to parse the document Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Sat Feb 10 18:36:06 2007 @@ -599,7 +599,7 @@ style_root = tree.getroot().getprevious().parseXSL().getroot() self.assertEquals("{http://www.w3.org/1999/XSL/Transform}stylesheet", style_root.tag) - + def test_xslt_pi_embedded_xmlid(self): # test xml:id dictionary lookup mechanism tree = self.parse('''\ @@ -628,7 +628,7 @@ B ''', st.tostring(res)) - + def test_xslt_pi_embedded_id(self): # test XPath lookup mechanism tree = self.parse('''\ @@ -663,6 +663,88 @@ ''', st.tostring(res)) + def test_xslt_pi_get(self): + tree = self.parse('''\ + + + + B + C +''') + + pi = tree.getroot().getprevious() + self.assertEquals("TEST", pi.get("href")) + + def test_xslt_pi_get_all(self): + tree = self.parse('''\ + + + + B + C +''') + + pi = tree.getroot().getprevious() + self.assertEquals("TEST", pi.get("href")) + self.assertEquals("text/xsl", pi.get("type")) + self.assertEquals(None, pi.get("motz")) + + def test_xslt_pi_get_all_reversed(self): + tree = self.parse('''\ + + + + B + C +''') + + pi = tree.getroot().getprevious() + self.assertEquals("TEST", pi.get("href")) + self.assertEquals("text/xsl", pi.get("type")) + self.assertEquals(None, pi.get("motz")) + + def test_xslt_pi_get_unknown(self): + tree = self.parse('''\ + + + + B + C +''') + + pi = tree.getroot().getprevious() + self.assertEquals(None, pi.get("unknownattribute")) + + def test_xslt_pi_set_replace(self): + tree = self.parse('''\ + + + + B + C +''') + + pi = tree.getroot().getprevious() + self.assertEquals("TEST", pi.get("href")) + + pi.set("href", "TEST123") + self.assertEquals("TEST123", pi.get("href")) + + def test_xslt_pi_set_new(self): + tree = self.parse('''\ + + + + B + C +''') + + pi = tree.getroot().getprevious() + self.assertEquals(None, pi.get("href")) + + pi.set("href", "TEST") + self.assertEquals("TEST", pi.get("href")) + def test_exslt_regexp_test(self): xslt = etree.XSLT(etree.XML("""\ ' in value: + raise ValueError, "Invalid URL, must not contain '\"' or '>'" + else: + attrib = ' href="%s"' % value + text = ' ' + self.text + if _FIND_PI_HREF(text): + self.text = _REPLACE_PI_HREF(attrib, text) + else: + self.text = text + attrib + + def get(self, key, default=None): + for attr, value in _FIND_PI_ATTRIBUTES(' ' + self.text): + if attr == key: + return value + return default ################################################################################ # EXSLT regexp implementation From scoder at codespeak.net Sat Feb 10 18:36:30 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 10 Feb 2007 18:36:30 +0100 (CET) Subject: [Lxml-checkins] r38404 - lxml/trunk Message-ID: <20070210173630.E9823100A5@code0.codespeak.net> Author: scoder Date: Sat Feb 10 18:36:29 2007 New Revision: 38404 Modified: lxml/trunk/setup.py Log: it's mature, right? Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Sat Feb 10 18:36:29 2007 @@ -37,8 +37,9 @@ description="Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API.", long_description=(("""\ -lxml is a Pythonic binding for the libxml2 and libxslt libraries. It provides -safe and convenient access to these libraries using the ElementTree API. +lxml is a Pythonic, mature binding for the libxml2 and libxslt libraries. It +provides safe and convenient access to these libraries using the ElementTree +API. It extends the ElementTree API significantly to offer support for XPath, RelaxNG, XML Schema, XSLT, C14N and much more. From scoder at codespeak.net Sat Feb 10 22:26:01 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 10 Feb 2007 22:26:01 +0100 (CET) Subject: [Lxml-checkins] r38417 - lxml/trunk/benchmark Message-ID: <20070210212601.DBBB710093@code0.codespeak.net> Author: scoder Date: Sat Feb 10 22:25:59 2007 New Revision: 38417 Modified: lxml/trunk/benchmark/benchbase.py Log: support regexps in benchmark selection Modified: lxml/trunk/benchmark/benchbase.py ============================================================================== --- lxml/trunk/benchmark/benchbase.py (original) +++ lxml/trunk/benchmark/benchbase.py Sat Feb 10 22:25:59 2007 @@ -1,4 +1,4 @@ -import sys, string, time, copy, gc +import sys, re, string, time, copy, gc from itertools import * from StringIO import StringIO import time @@ -305,10 +305,12 @@ # sorted by name and tree tuple benchmarks = [ sorted(b.benchmarks()) for b in benchmark_suites ] + selected = [ re.compile(r).search for r in selected ] + if selected: benchmarks = [ [ b for b in bs if [ match for match in selected - if match in b[0] ] ] + if match(b[0]) ] ] for bs in benchmarks ] return (benchmark_suites, benchmarks) From scoder at codespeak.net Sat Feb 10 22:26:40 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 10 Feb 2007 22:26:40 +0100 (CET) Subject: [Lxml-checkins] r38418 - lxml/trunk Message-ID: <20070210212640.95DD610093@code0.codespeak.net> Author: scoder Date: Sat Feb 10 22:26:38 2007 New Revision: 38418 Modified: lxml/trunk/CHANGES.txt Log: support regexps in benchmark selection Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Feb 10 22:26:38 2007 @@ -8,6 +8,8 @@ Features added -------------- +* Support for regular expressions in benchmark selection + * get/set emulation (not .attrib!) for attributes on processing instructions * ElementInclude Python module for ElementTree compatible XInclude processing From scoder at codespeak.net Sat Feb 10 22:35:17 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 10 Feb 2007 22:35:17 +0100 (CET) Subject: [Lxml-checkins] r38419 - lxml/trunk/src/lxml Message-ID: <20070210213517.2E33510093@code0.codespeak.net> Author: scoder Date: Sat Feb 10 22:35:15 2007 New Revision: 38419 Added: lxml/trunk/src/lxml/config.pxd Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/etree_defs.h lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/python.pxd Log: ported parser locking to pythread.h (instead of Python thread module) Added: lxml/trunk/src/lxml/config.pxd ============================================================================== --- (empty file) +++ lxml/trunk/src/lxml/config.pxd Sat Feb 10 22:35:15 2007 @@ -0,0 +1,2 @@ +cdef extern from "etree_defs.h": + cdef int ENABLE_THREADING Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sat Feb 10 22:35:15 2007 @@ -1,4 +1,4 @@ -cimport tree, python +cimport tree, python, config from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs from python cimport isinstance, issubclass, hasattr, getattr, callable from python cimport iter, repr, str, _cstr, _isString, Py_ssize_t @@ -38,12 +38,6 @@ cdef object re import re -cdef object thread -try: - import thread -except ImportError: - pass - cdef object ITER_EMPTY ITER_EMPTY = iter(()) Modified: lxml/trunk/src/lxml/etree_defs.h ============================================================================== --- lxml/trunk/src/lxml/etree_defs.h (original) +++ lxml/trunk/src/lxml/etree_defs.h Sat Feb 10 22:35:15 2007 @@ -28,6 +28,17 @@ #define PyEval_RestoreThread(state) #define PyGILState_Ensure() (PyGILState_UNLOCKED) #define PyGILState_Release(state) + + #define PyThread_allocate_lock() (NULL) + #define PyThread_free_lock(lock) + #define PyThread_acquire_lock(lock, mode) (1) + #define PyThread_release_lock(lock) +#endif + +#ifdef WITHOUT_THREADING + #define ENABLE_THREADING 0 +#else + #define ENABLE_THREADING 1 #endif /* libxml2 version specific setup */ Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sat Feb 10 22:35:15 2007 @@ -349,8 +349,7 @@ cdef LxmlParserType _parser_type cdef xmlParserCtxt* _parser_ctxt cdef ElementClassLookup _class_lookup - cdef object _lockParser - cdef object _unlockParser + cdef python.PyThread_type_lock _parser_lock def __init__(self, context_class=_ResolverContext): cdef xmlParserCtxt* pctxt @@ -371,14 +370,12 @@ if pctxt.sax != NULL: # hard switch-off for CDATA nodes => makes them plain text pctxt.sax.cdataBlock = NULL - if thread is None or self._parser_type == LXML_ITERPARSE_PARSER: + if not config.ENABLE_THREADING or \ + self._parser_type == LXML_ITERPARSE_PARSER: # no threading - self._lockParser = self.__dummy - self._unlockParser = self.__dummy + self._parser_lock = NULL else: - lock = thread.allocate_lock() - self._lockParser = lock.acquire - self._unlockParser = lock.release + self._parser_lock = python.PyThread_allocate_lock() self._error_log = _ErrorLog() self.resolvers = _ResolverRegistry() self._context = context_class(self.resolvers) @@ -387,6 +384,8 @@ def __dealloc__(self): if self._parser_ctxt is not NULL: xmlparser.xmlFreeParserCtxt(self._parser_ctxt) + if self._parser_lock is not NULL: + python.PyThread_free_lock(self._parser_lock) cdef void _cleanup(self): cdef xmlParserCtxt* pctxt @@ -395,6 +394,21 @@ if pctxt.spaceTab is not NULL: # work around bug in libxml2 xmlparser.xmlClearParserCtxt(pctxt) + cdef int _lockParser(self) except 1: + cdef python.PyThreadState* state + cdef int result + if config.ENABLE_THREADING and self._parser_lock != NULL: + state = python.PyEval_SaveThread() + result = python.PyThread_acquire_lock(self._parser_lock, python.WAIT_LOCK) + python.PyEval_RestoreThread(state) + if result == 0: + raise ParserError, "parser locking failed" + return 0 + + cdef void _unlockParser(self): + if config.ENABLE_THREADING and self._parser_lock != NULL: + python.PyThread_release_lock(self._parser_lock) + property error_log: def __get__(self): return self._error_log.copy() Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Sat Feb 10 22:35:15 2007 @@ -88,6 +88,18 @@ cdef void PyEval_RestoreThread(PyThreadState* state) cdef PyObject* PyThreadState_GetDict() +cdef extern from "pythread.h": + ctypedef void* PyThread_type_lock + cdef PyThread_type_lock PyThread_allocate_lock() + cdef void PyThread_free_lock(PyThread_type_lock lock) + cdef int PyThread_acquire_lock(PyThread_type_lock lock, int mode) + cdef void PyThread_release_lock(PyThread_type_lock lock) + cdef long PyThread_get_thread_ident() + + ctypedef enum __WaitLock: + WAIT_LOCK + NOWAIT_LOCK + cdef extern from "etree_defs.h": # redefines some functions as macros cdef int _isString(object obj) cdef int isinstance(object instance, object classes) From scoder at codespeak.net Mon Feb 12 10:41:08 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 12 Feb 2007 10:41:08 +0100 (CET) Subject: [Lxml-checkins] r38537 - lxml/trunk/src/lxml Message-ID: <20070212094108.9E405100A7@code0.codespeak.net> Author: scoder Date: Mon Feb 12 10:41:06 2007 New Revision: 38537 Modified: lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/tree.pxd Log: use enum types instead of ints Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Mon Feb 12 10:41:06 2007 @@ -160,7 +160,7 @@ cdef char* _findEncodingName(char* buffer, int size): "Work around bug in libxml2: find iconv name of encoding on our own." - cdef int enc + cdef tree.xmlCharEncoding enc enc = tree.xmlDetectCharEncoding(buffer, size) if enc == tree.XML_CHAR_ENCODING_UTF16LE: return "UTF16LE" Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Mon Feb 12 10:41:06 2007 @@ -36,9 +36,9 @@ ctypedef struct xmlCharEncodingHandler cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name) - cdef xmlCharEncodingHandler* xmlGetCharEncodingHandler(int enc) + cdef xmlCharEncodingHandler* xmlGetCharEncodingHandler(xmlCharEncoding enc) cdef int xmlCharEncCloseFunc(xmlCharEncodingHandler* handler) - cdef int xmlDetectCharEncoding(char* text, int len) + cdef xmlCharEncoding xmlDetectCharEncoding(char* text, int len) cdef char* xmlGetCharEncodingName(xmlCharEncoding enc) cdef extern from "libxml/hash.h": From scoder at codespeak.net Wed Feb 14 15:16:37 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 14 Feb 2007 15:16:37 +0100 (CET) Subject: [Lxml-checkins] r38833 - lxml/trunk Message-ID: <20070214141637.2D97610086@code0.codespeak.net> Author: scoder Date: Wed Feb 14 15:16:37 2007 New Revision: 38833 Modified: lxml/trunk/version.txt Log: version: 1.2 Modified: lxml/trunk/version.txt ============================================================================== --- lxml/trunk/version.txt (original) +++ lxml/trunk/version.txt Wed Feb 14 15:16:37 2007 @@ -1 +1 @@ -1.2dev +1.2 From scoder at codespeak.net Wed Feb 14 15:17:02 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 14 Feb 2007 15:17:02 +0100 (CET) Subject: [Lxml-checkins] r38834 - lxml/trunk Message-ID: <20070214141702.254DF10086@code0.codespeak.net> Author: scoder Date: Wed Feb 14 15:16:57 2007 New Revision: 38834 Modified: lxml/trunk/setupinfo.py Log: env_map removed (by Sidnei) Modified: lxml/trunk/setupinfo.py ============================================================================== --- lxml/trunk/setupinfo.py (original) +++ lxml/trunk/setupinfo.py Wed Feb 14 15:16:57 2007 @@ -13,13 +13,8 @@ ] -env_map = {'win32':{'INCLUDE': 'INCLUDE', - 'LIBRARY': 'LIB', - 'CFLAGS' : 'CFLAGS'}, - }.get(sys.platform, {}) - def env_var(name): - value = os.getenv(env_map.get(name), '') + value = os.getenv(name, '') return value.split(os.pathsep) def ext_modules(static_include_dirs, static_library_dirs, static_cflags): From scoder at codespeak.net Wed Feb 14 15:17:43 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 14 Feb 2007 15:17:43 +0100 (CET) Subject: [Lxml-checkins] r38835 - lxml/trunk Message-ID: <20070214141743.3B2B210086@code0.codespeak.net> Author: scoder Date: Wed Feb 14 15:17:41 2007 New Revision: 38835 Modified: lxml/trunk/versioninfo.py Log: SVN version parser cleanups Modified: lxml/trunk/versioninfo.py ============================================================================== --- lxml/trunk/versioninfo.py (original) +++ lxml/trunk/versioninfo.py Wed Feb 14 15:17:41 2007 @@ -30,11 +30,13 @@ f.close() if data.startswith('8'): + # SVN >= 1.4 data = map(str.splitlines, data.split('\n\x0c\n')) del data[0][0] # get rid of the '8' dirurl = data[0][3] localrev = max([int(d[9]) for d in data if len(d)>9 and d[9]]) elif data.startswith(' Author: scoder Date: Wed Feb 14 15:19:02 2007 New Revision: 38836 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/etreepublic.pxd lxml/trunk/src/lxml/extensions.pxi lxml/trunk/src/lxml/iterparse.pxi lxml/trunk/src/lxml/proxy.pxi lxml/trunk/src/lxml/public-api.pxi lxml/trunk/src/lxml/relaxng.pxi lxml/trunk/src/lxml/serializer.pxi lxml/trunk/src/lxml/xmlid.pxi lxml/trunk/src/lxml/xmlschema.pxi lxml/trunk/src/lxml/xpath.pxi lxml/trunk/src/lxml/xslt.pxi Log: removed now unneeded _NodeBase class, merged into _Element Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Feb 14 15:19:02 2007 @@ -2,8 +2,8 @@ lxml changelog ============== -under development -================= +1.2 (2007-02-14) +================ Features added -------------- @@ -34,6 +34,13 @@ * Element.find*() did not accept QName objects as path +Other changes +------------- + +* code cleanup: redundant _NodeBase super class merged into _Element class + Note: although the impact should be zero in most cases, this change breaks + the compatibiliy of the public C-API + 1.1.2 (2006-10-30) ================== Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Wed Feb 14 15:19:02 2007 @@ -10,19 +10,19 @@ c_child = c_child.next cdef _Document _documentOrRaise(object input): - """Call this to get the document of a _Document, _ElementTree or _NodeBase + """Call this to get the document of a _Document, _ElementTree or _Element object, or to raise an exception if it can't be determined. Should be used in all API functions for consistency. """ cdef _Document doc - cdef _NodeBase element + cdef _Element element if isinstance(input, _ElementTree): element = (<_ElementTree>input)._context_node if element is not None: doc = element._doc - elif isinstance(input, _NodeBase): - doc = (<_NodeBase>input)._doc + elif isinstance(input, _Element): + doc = (<_Element>input)._doc elif isinstance(input, _Document): doc = <_Document>input else: @@ -32,17 +32,17 @@ else: return doc -cdef _NodeBase _rootNodeOrRaise(object input): +cdef _Element _rootNodeOrRaise(object input): """Call this to get the root node of a _Document, _ElementTree or - _NodeBase object, or to raise an exception if it can't be determined. + _Element object, or to raise an exception if it can't be determined. Should be used in all API functions for consistency. """ - cdef _NodeBase node + cdef _Element node if isinstance(input, _ElementTree): node = (<_ElementTree>input)._context_node - elif isinstance(input, _NodeBase): - node = <_NodeBase>input + elif isinstance(input, _Element): + node = <_Element>input elif isinstance(input, _Document): node = (<_Document>input).getroot() else: @@ -54,27 +54,27 @@ cdef _Document _documentOf(object input): # call this to get the document of a - # _Document, _ElementTree or _NodeBase object + # _Document, _ElementTree or _Element object # may return None! - cdef _NodeBase element + cdef _Element element if isinstance(input, _ElementTree): element = (<_ElementTree>input)._context_node if element is not None: return element._doc - elif isinstance(input, _NodeBase): - return (<_NodeBase>input)._doc + elif isinstance(input, _Element): + return (<_Element>input)._doc elif isinstance(input, _Document): return <_Document>input return None -cdef _NodeBase _rootNodeOf(object input): +cdef _Element _rootNodeOf(object input): # call this to get the root node of a - # _Document, _ElementTree or _NodeBase object + # _Document, _ElementTree or _Element object # may return None! if isinstance(input, _ElementTree): return (<_ElementTree>input)._context_node - elif isinstance(input, _NodeBase): - return <_NodeBase>input + elif isinstance(input, _Element): + return <_Element>input elif isinstance(input, _Document): return (<_Document>input).getroot() else: @@ -176,7 +176,7 @@ tree.xmlFree(c_result) return result -cdef object _getAttributeValue(_NodeBase element, key, default): +cdef object _getAttributeValue(_Element element, key, default): cdef char* c_result cdef char* c_tag ns, tag = _getNsTag(key) @@ -192,7 +192,7 @@ tree.xmlFree(c_result) return result -cdef int _setAttributeValue(_NodeBase element, key, value) except -1: +cdef int _setAttributeValue(_Element element, key, value) except -1: cdef xmlNs* c_ns cdef char* c_value cdef char* c_tag @@ -207,7 +207,7 @@ tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value) return 0 -cdef int _delAttribute(_NodeBase element, key) except -1: +cdef int _delAttribute(_Element element, key) except -1: cdef xmlAttr* c_attr cdef char* c_href ns, tag = _getNsTag(key) Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Feb 14 15:19:02 2007 @@ -358,6 +358,7 @@ result._parser = parser return result + cdef class DocInfo: "Document information provided by parser and DTD." cdef readonly object root_name @@ -391,287 +392,33 @@ else: return "" -cdef public class _NodeBase [ type LxmlNodeBaseType, - object LxmlNodeBase ]: - """Base class to reference a document object and a libxml node. + +cdef public class _Element [ type LxmlElementType, object LxmlElement ]: + """Element class. References a document object and a libxml node. By pointing to a Document instance, a reference is kept to _Document as long as there is some pointer to a node in it. """ cdef _Document _doc cdef xmlNode* _c_node - - def __dealloc__(self): - #print "trying to free node:", self._c_node - #displayNode(self._c_node, 0) - if self._c_node is not NULL: - unregisterProxy(self) - attemptDeallocation(self._c_node) - -cdef public class _ElementTree [ type LxmlElementTreeType, - object LxmlElementTree ]: - cdef _Document _doc - cdef _NodeBase _context_node - - # Note that _doc is only used to store the original document if we do not - # have a _context_node. All methods should prefer self._context_node._doc - # to honour tree restructuring. _doc can happily be None! - - cdef _assertHasRoot(self): - """We have to take care here: the document may not have a root node! - This can happen if ElementTree() is called without any argument and - the caller 'forgets' to call parse() afterwards, so this is a bug in - the caller program. - """ - assert self._context_node is not None, \ - "ElementTree not initialized, missing root" - - def parse(self, source, _BaseParser parser=None): - """Updates self with the content of source and returns its root - """ - cdef _Document doc - doc = _parseDocument(source, parser) - self._context_node = doc.getroot() - if self._context_node is None: - self._doc = doc - else: - self._doc = None - return self._context_node - - def getroot(self): - """Gets the root element for this tree. - """ - return self._context_node - - def __copy__(self): - return ElementTree(self._context_node) - - def __deepcopy__(self, memo): - if self._context_node is None: - return ElementTree() - else: - return ElementTree( self._context_node.__copy__() ) - - property docinfo: - """Information about the document provided by parser and DTD. This - value is only defined for ElementTree objects based on the root node - of a parsed document (e.g. those returned by the parse functions). - """ - def __get__(self): - self._assertHasRoot() - return DocInfo(self._context_node._doc) - - property parser: - """The parser that was used to parse the document in this ElementTree. - """ - def __get__(self): - if self._context_node is not None and \ - self._context_node._doc is not None: - return self._context_node._doc._parser - return None - - def write(self, file, encoding=None, - pretty_print=False, xml_declaration=None): - """Write the tree to a file or file-like object. - - Defaults to ASCII encoding and writing a declaration as needed. - """ - cdef int c_write_declaration - self._assertHasRoot() - # suppress decl. in default case (purely for ElementTree compatibility) - if xml_declaration is not None: - c_write_declaration = bool(xml_declaration) - if encoding is None: - encoding = 'ASCII' - elif encoding is None: - encoding = 'ASCII' - c_write_declaration = 0 - else: - encoding = encoding.upper() - c_write_declaration = encoding not in \ - ('US-ASCII', 'ASCII', 'UTF8', 'UTF-8') - _tofilelike(file, self._context_node, encoding, - c_write_declaration, bool(pretty_print)) - - def getpath(self, _NodeBase element not None): - """Returns a structural, absolute XPath expression to find that element. - """ - cdef _Document doc - cdef xmlDoc* c_doc - cdef char* c_path - doc = self._context_node._doc - if element._doc is not doc: - raise ValueError, "Element is not in this tree." - c_doc = _fakeRootDoc(doc._c_doc, self._context_node._c_node) - c_path = tree.xmlGetNodePath(element._c_node) - _destroyFakeDoc(doc._c_doc, c_doc) - if c_path is NULL: - raise LxmlError, "Error creating node path." - path = c_path - tree.xmlFree(c_path) - return path - - def getiterator(self, tag=None): - """Creates an iterator for the root element. The iterator loops over all elements - in this tree, in document order. - """ - root = self.getroot() - if root is None: - return () - return root.getiterator(tag) - - def find(self, path): - """Finds the first toplevel element with given tag. Same as getroot().find(path). - """ - self._assertHasRoot() - root = self.getroot() - if path[:1] == "/": - path = "." + path - return root.find(path) - - def findtext(self, path, default=None): - """Finds the element text for the first toplevel element with given tag. Same as getroot().findtext(path) - """ - self._assertHasRoot() - root = self.getroot() - if path[:1] == "/": - path = "." + path - return root.findtext(path, default) - - def findall(self, path): - """Finds all toplevel elements with the given tag. Same as getroot().findall(path). - """ - self._assertHasRoot() - root = self.getroot() - if path[:1] == "/": - path = "." + path - return root.findall(path) - - # extensions to ElementTree API - def xpath(self, _path, namespaces=None, extensions=None, **_variables): - """XPath evaluate in context of document. - - ``namespaces`` is an optional dictionary with prefix to namespace URI - mappings, used by XPath. ``extensions`` defines additional extension - functions. - - Returns a list (nodeset), or bool, float or string. - - In case of a list result, return Element for element nodes, - string for text and attribute values. - - Note: if you are going to apply multiple XPath expressions - against the same document, it is more efficient to use - XPathEvaluator directly. - """ - self._assertHasRoot() - evaluator = XPathDocumentEvaluator(self, namespaces, extensions) - return evaluator.evaluate(_path, **_variables) - - def xslt(self, _xslt, extensions=None, access_control=None, **_kw): - """Transform this document using other document. - - xslt is a tree that should be XSLT - keyword parameters are XSLT transformation parameters. - - Returns the transformed tree. - - Note: if you are going to apply the same XSLT stylesheet against - multiple documents, it is more efficient to use the XSLT - class directly. - """ - self._assertHasRoot() - style = XSLT(_xslt, extensions=extensions, - access_control=access_control) - return style(self, **_kw) - - def relaxng(self, relaxng): - """Validate this document using other document. - - relaxng is a tree that should contain Relax NG XML - - Returns True or False, depending on whether validation - succeeded. - - Note: if you are going to apply the same Relax NG schema against - multiple documents, it is more efficient to use the RelaxNG - class directly. - """ - self._assertHasRoot() - schema = RelaxNG(relaxng) - return schema.validate(self) - - def xmlschema(self, xmlschema): - """Validate this document using other document. - - xmlschema is a tree that should contain XML Schema XML. - - Returns True or False, depending on whether validation - succeeded. - - Note: If you are going to apply the same XML Schema against - multiple documents, it is more efficient to use the XMLSchema - class directly. - """ - self._assertHasRoot() - schema = XMLSchema(xmlschema) - return schema.validate(self) - - def xinclude(self): - """Process the XInclude nodes in this document and include the - referenced XML fragments. - """ - cdef int result - # We cannot pass the XML_PARSE_NOXINCNODE option as this would free - # the XInclude nodes - there may still be Python references to them! - # Therefore, we allow XInclude nodes to be converted to - # XML_XINCLUDE_START nodes. XML_XINCLUDE_END nodes are added as - # siblings. Tree traversal will simply ignore them as they are not - # typed as elements. The included fragment is added between the two, - # i.e. as a sibling, which does not conflict with traversal. - self._assertHasRoot() - if self._context_node._doc._parser != None: - result = xinclude.xmlXIncludeProcessTreeFlags( - self._context_node._c_node, - self._context_node._doc._parser._parse_options) - else: - result = xinclude.xmlXIncludeProcessTree( - self._context_node._c_node) - if result == -1: - raise XIncludeError, "XInclude processing failed" - - def write_c14n(self, file): - """C14N write of document. Always writes UTF-8. - """ - self._assertHasRoot() - _tofilelikeC14N(file, self._context_node) - -cdef _ElementTree _elementTreeFactory(_Document doc, _NodeBase context_node): - return _newElementTree(doc, context_node, _ElementTree) - -cdef _ElementTree _newElementTree(_Document doc, _NodeBase context_node, - object baseclass): - cdef _ElementTree result - result = baseclass() - if context_node is None and doc is not None: - context_node = doc.getroot() - if context_node is None: - result._doc = doc - result._context_node = context_node - return result - -cdef public class _Element(_NodeBase) [ type LxmlElementType, - object LxmlElement ]: cdef object _tag cdef object _attrib + def _init(self): """Called after object initialisation. Custom subclasses may override this if they recursively call _init() in the superclasses. """ + def __dealloc__(self): + #print "trying to free node:", self._c_node + #displayNode(self._c_node, 0) + if self._c_node is not NULL: + unregisterProxy(self) + attemptDeallocation(self._c_node) + # MANIPULATORS - def __setitem__(self, Py_ssize_t index, _NodeBase element not None): + def __setitem__(self, Py_ssize_t index, _Element element not None): """Replaces the given subelement. """ cdef xmlNode* c_node @@ -709,7 +456,7 @@ """ cdef xmlNode* c_node cdef xmlNode* c_next - cdef _Element mynode + cdef _Element element # first, find start of slice if start == python.PY_SSIZE_T_MAX: c_node = NULL @@ -724,18 +471,18 @@ _appendChild(self, element) return # if the next element is in the list, insert before it - for mynode in value: - if mynode is None: + for element in value: + if element is None: raise TypeError, "Node must not be None." # store possible text tail - c_next = mynode._c_node.next + c_next = element._c_node.next # now move node previous to insertion point - tree.xmlUnlinkNode(mynode._c_node) - tree.xmlAddPrevSibling(c_node, mynode._c_node) + tree.xmlUnlinkNode(element._c_node) + tree.xmlAddPrevSibling(c_node, element._c_node) # and move tail just behind his node - _moveTail(c_next, mynode._c_node) + _moveTail(c_next, element._c_node) # move it into a new document - moveNodeToDocument(mynode, self._doc) + moveNodeToDocument(element, self._doc) def __deepcopy__(self, memo): return self.__copy__() @@ -987,9 +734,9 @@ def __contains__(self, element): cdef xmlNode* c_node - if not isinstance(element, _NodeBase): + if not isinstance(element, _Element): return 0 - c_node = (<_NodeBase>element)._c_node + c_node = (<_Element>element)._c_node return c_node is not NULL and c_node.parent is self._c_node def __iter__(self): @@ -1235,6 +982,7 @@ result._init() return result + cdef class __ContentOnlyElement(_Element): cdef int _raiseImmutable(self) except -1: raise TypeError, "this element does not have children or attributes" @@ -1321,9 +1069,263 @@ else: return "" % self.target + +cdef public class _ElementTree [ type LxmlElementTreeType, + object LxmlElementTree ]: + cdef _Document _doc + cdef _Element _context_node + + # Note that _doc is only used to store the original document if we do not + # have a _context_node. All methods should prefer self._context_node._doc + # to honour tree restructuring. _doc can happily be None! + + cdef _assertHasRoot(self): + """We have to take care here: the document may not have a root node! + This can happen if ElementTree() is called without any argument and + the caller 'forgets' to call parse() afterwards, so this is a bug in + the caller program. + """ + assert self._context_node is not None, \ + "ElementTree not initialized, missing root" + + def parse(self, source, _BaseParser parser=None): + """Updates self with the content of source and returns its root + """ + cdef _Document doc + doc = _parseDocument(source, parser) + self._context_node = doc.getroot() + if self._context_node is None: + self._doc = doc + else: + self._doc = None + return self._context_node + + def getroot(self): + """Gets the root element for this tree. + """ + return self._context_node + + def __copy__(self): + return ElementTree(self._context_node) + + def __deepcopy__(self, memo): + if self._context_node is None: + return ElementTree() + else: + return ElementTree( self._context_node.__copy__() ) + + property docinfo: + """Information about the document provided by parser and DTD. This + value is only defined for ElementTree objects based on the root node + of a parsed document (e.g. those returned by the parse functions). + """ + def __get__(self): + self._assertHasRoot() + return DocInfo(self._context_node._doc) + + property parser: + """The parser that was used to parse the document in this ElementTree. + """ + def __get__(self): + if self._context_node is not None and \ + self._context_node._doc is not None: + return self._context_node._doc._parser + return None + + def write(self, file, encoding=None, + pretty_print=False, xml_declaration=None): + """Write the tree to a file or file-like object. + + Defaults to ASCII encoding and writing a declaration as needed. + """ + cdef int c_write_declaration + self._assertHasRoot() + # suppress decl. in default case (purely for ElementTree compatibility) + if xml_declaration is not None: + c_write_declaration = bool(xml_declaration) + if encoding is None: + encoding = 'ASCII' + elif encoding is None: + encoding = 'ASCII' + c_write_declaration = 0 + else: + encoding = encoding.upper() + c_write_declaration = encoding not in \ + ('US-ASCII', 'ASCII', 'UTF8', 'UTF-8') + _tofilelike(file, self._context_node, encoding, + c_write_declaration, bool(pretty_print)) + + def getpath(self, _Element element not None): + """Returns a structural, absolute XPath expression to find that element. + """ + cdef _Document doc + cdef xmlDoc* c_doc + cdef char* c_path + doc = self._context_node._doc + if element._doc is not doc: + raise ValueError, "Element is not in this tree." + c_doc = _fakeRootDoc(doc._c_doc, self._context_node._c_node) + c_path = tree.xmlGetNodePath(element._c_node) + _destroyFakeDoc(doc._c_doc, c_doc) + if c_path is NULL: + raise LxmlError, "Error creating node path." + path = c_path + tree.xmlFree(c_path) + return path + + def getiterator(self, tag=None): + """Creates an iterator for the root element. The iterator loops over all elements + in this tree, in document order. + """ + root = self.getroot() + if root is None: + return () + return root.getiterator(tag) + + def find(self, path): + """Finds the first toplevel element with given tag. Same as getroot().find(path). + """ + self._assertHasRoot() + root = self.getroot() + if path[:1] == "/": + path = "." + path + return root.find(path) + + def findtext(self, path, default=None): + """Finds the element text for the first toplevel element with given tag. Same as getroot().findtext(path) + """ + self._assertHasRoot() + root = self.getroot() + if path[:1] == "/": + path = "." + path + return root.findtext(path, default) + + def findall(self, path): + """Finds all toplevel elements with the given tag. Same as getroot().findall(path). + """ + self._assertHasRoot() + root = self.getroot() + if path[:1] == "/": + path = "." + path + return root.findall(path) + + # extensions to ElementTree API + def xpath(self, _path, namespaces=None, extensions=None, **_variables): + """XPath evaluate in context of document. + + ``namespaces`` is an optional dictionary with prefix to namespace URI + mappings, used by XPath. ``extensions`` defines additional extension + functions. + + Returns a list (nodeset), or bool, float or string. + + In case of a list result, return Element for element nodes, + string for text and attribute values. + + Note: if you are going to apply multiple XPath expressions + against the same document, it is more efficient to use + XPathEvaluator directly. + """ + self._assertHasRoot() + evaluator = XPathDocumentEvaluator(self, namespaces, extensions) + return evaluator.evaluate(_path, **_variables) + + def xslt(self, _xslt, extensions=None, access_control=None, **_kw): + """Transform this document using other document. + + xslt is a tree that should be XSLT + keyword parameters are XSLT transformation parameters. + + Returns the transformed tree. + + Note: if you are going to apply the same XSLT stylesheet against + multiple documents, it is more efficient to use the XSLT + class directly. + """ + self._assertHasRoot() + style = XSLT(_xslt, extensions=extensions, + access_control=access_control) + return style(self, **_kw) + + def relaxng(self, relaxng): + """Validate this document using other document. + + relaxng is a tree that should contain Relax NG XML + + Returns True or False, depending on whether validation + succeeded. + + Note: if you are going to apply the same Relax NG schema against + multiple documents, it is more efficient to use the RelaxNG + class directly. + """ + self._assertHasRoot() + schema = RelaxNG(relaxng) + return schema.validate(self) + + def xmlschema(self, xmlschema): + """Validate this document using other document. + + xmlschema is a tree that should contain XML Schema XML. + + Returns True or False, depending on whether validation + succeeded. + + Note: If you are going to apply the same XML Schema against + multiple documents, it is more efficient to use the XMLSchema + class directly. + """ + self._assertHasRoot() + schema = XMLSchema(xmlschema) + return schema.validate(self) + + def xinclude(self): + """Process the XInclude nodes in this document and include the + referenced XML fragments. + """ + cdef int result + # We cannot pass the XML_PARSE_NOXINCNODE option as this would free + # the XInclude nodes - there may still be Python references to them! + # Therefore, we allow XInclude nodes to be converted to + # XML_XINCLUDE_START nodes. XML_XINCLUDE_END nodes are added as + # siblings. Tree traversal will simply ignore them as they are not + # typed as elements. The included fragment is added between the two, + # i.e. as a sibling, which does not conflict with traversal. + self._assertHasRoot() + if self._context_node._doc._parser != None: + result = xinclude.xmlXIncludeProcessTreeFlags( + self._context_node._c_node, + self._context_node._doc._parser._parse_options) + else: + result = xinclude.xmlXIncludeProcessTree( + self._context_node._c_node) + if result == -1: + raise XIncludeError, "XInclude processing failed" + + def write_c14n(self, file): + """C14N write of document. Always writes UTF-8. + """ + self._assertHasRoot() + _tofilelikeC14N(file, self._context_node) + +cdef _ElementTree _elementTreeFactory(_Document doc, _Element context_node): + return _newElementTree(doc, context_node, _ElementTree) + +cdef _ElementTree _newElementTree(_Document doc, _Element context_node, + object baseclass): + cdef _ElementTree result + result = baseclass() + if context_node is None and doc is not None: + context_node = doc.getroot() + if context_node is None: + result._doc = doc + result._context_node = context_node + return result + + cdef class _Attrib: - cdef _NodeBase _element - def __init__(self, _NodeBase element not None): + cdef _Element _element + def __init__(self, _Element element not None): self._element = element # MANIPULATORS @@ -1479,12 +1481,12 @@ cdef public class _ElementIterator(_ElementTagMatcher) [ object LxmlElementIterator, type LxmlElementIteratorType ]: # we keep Python references here to control GC - cdef _NodeBase _node + cdef _Element _node cdef _node_to_node_function _next_element def __iter__(self): return self - cdef void _storeNext(self, _NodeBase node): + cdef void _storeNext(self, _Element node): cdef xmlNode* c_node c_node = self._next_element(node._c_node) while c_node is not NULL and \ @@ -1498,7 +1500,7 @@ def __next__(self): cdef xmlNode* c_node - cdef _NodeBase current_node + cdef _Element current_node # Python ref: current_node = self._node if current_node is None: @@ -1508,7 +1510,7 @@ cdef class ElementChildIterator(_ElementIterator): "Iterates over the children of an element." - def __init__(self, _NodeBase node not None, reversed=False, tag=None): + def __init__(self, _Element node not None, reversed=False, tag=None): cdef xmlNode* c_node self._initTagMatch(tag) if reversed: @@ -1530,7 +1532,7 @@ You can pass the boolean keyword ``preceding`` to specify the direction. """ - def __init__(self, _NodeBase node not None, preceding=False, tag=None): + def __init__(self, _Element node not None, preceding=False, tag=None): self._initTagMatch(tag) if preceding: self._next_element = _previousElement @@ -1540,7 +1542,7 @@ cdef class AncestorsIterator(_ElementIterator): "Iterates over the ancestors of an element (from parent to parent)." - def __init__(self, _NodeBase node not None, tag=None): + def __init__(self, _Element node not None, tag=None): self._initTagMatch(tag) self._next_element = _parentElement self._storeNext(node) @@ -1560,9 +1562,9 @@ """ # we keep Python references here to control GC # keep next node to return and a depth counter in the tree - cdef _NodeBase _next_node - cdef _NodeBase _top_node - def __init__(self, _NodeBase node not None, tag=None, inclusive=True): + cdef _Element _next_node + cdef _Element _top_node + def __init__(self, _Element node not None, tag=None, inclusive=True): self._top_node = node self._next_node = node self._initTagMatch(tag) @@ -1577,7 +1579,7 @@ def __next__(self): cdef xmlNode* c_node - cdef _NodeBase current_node + cdef _Element current_node current_node = self._next_node if current_node is None: raise StopIteration @@ -1729,7 +1731,7 @@ """ return isinstance(element, _Element) -def dump(_NodeBase elem not None, pretty_print=True): +def dump(_Element elem not None, pretty_print=True): """Writes an element tree or element structure to sys.stdout. This function should be used for debugging only. """ @@ -1761,8 +1763,8 @@ else: write_declaration = bool(xml_declaration) - if isinstance(element_or_tree, _NodeBase): - return _tostring(<_NodeBase>element_or_tree, + if isinstance(element_or_tree, _Element): + return _tostring(<_Element>element_or_tree, encoding, write_declaration, c_pretty_print) elif isinstance(element_or_tree, _ElementTree): return _tostring((<_ElementTree>element_or_tree)._context_node, @@ -1782,8 +1784,8 @@ """ cdef int c_pretty_print c_pretty_print = bool(pretty_print) - if isinstance(element_or_tree, _NodeBase): - return _tounicode(<_NodeBase>element_or_tree, c_pretty_print) + if isinstance(element_or_tree, _Element): + return _tounicode(<_Element>element_or_tree, c_pretty_print) elif isinstance(element_or_tree, _ElementTree): return _tounicode((<_ElementTree>element_or_tree)._context_node, c_pretty_print) Modified: lxml/trunk/src/lxml/etreepublic.pxd ============================================================================== --- lxml/trunk/src/lxml/etreepublic.pxd (original) +++ lxml/trunk/src/lxml/etreepublic.pxd Wed Feb 14 15:19:02 2007 @@ -27,13 +27,10 @@ cdef class lxml.etree._Document [ object LxmlDocument ]: cdef tree.xmlDoc* _c_doc - cdef class lxml.etree._NodeBase [ object LxmlNodeBase ]: + cdef class lxml.etree._Element [ object LxmlElement ]: cdef _Document _doc cdef tree.xmlNode* _c_node - cdef class lxml.etree._Element(_NodeBase) [ object LxmlElement ]: - pass - cdef class lxml.etree.ElementBase(_Element) [ object LxmlElementBase ]: pass @@ -56,10 +53,10 @@ cdef _Element elementFactory(_Document doc, tree.xmlNode* c_node) # create an ElementTree for an Element - cdef _ElementTree elementTreeFactory(_NodeBase context_node) + cdef _ElementTree elementTreeFactory(_Element context_node) # create an ElementTree subclass for an Element - cdef _ElementTree newElementTree(_NodeBase context_node, object subclass) + cdef _ElementTree newElementTree(_Element context_node, object subclass) # create a new Element for an existing or new document (doc = None) # builds Python object after setting text, tail, namespaces and attributes @@ -101,7 +98,7 @@ char* c_ns, char* c_name) # return the value of attribute "{ns}name", or the default value - cdef object getAttributeValue(_NodeBase element, key, default) + cdef object getAttributeValue(_Element element, key, default) # return an iterator over attribute names (1), values (2) or items (3) # attributes must not be removed during iteration! @@ -109,11 +106,11 @@ # set an attribute value on an element # on failure, sets an exception and returns -1 - cdef int setAttributeValue(_NodeBase element, key, value) except -1 + cdef int setAttributeValue(_Element element, key, value) except -1 # delete an attribute # on failure, sets an exception and returns -1 - cdef int delAttribute(_NodeBase element, key) except -1 + cdef int delAttribute(_Element element, key) except -1 # delete an attribute based on name and namespace URI # returns -1 if the attribute was not found (no exception) @@ -153,12 +150,12 @@ cdef class lxml.etree._ElementIterator(_ElementTagMatcher) [ object LxmlElementIterator ]: - cdef _NodeBase _node + cdef _Element _node cdef tree.xmlNode* (*_next_element)(tree.xmlNode*) # store the initial node of the iterator if it matches the required tag # or its next matching sibling if not - cdef void iteratorStoreNext(_ElementIterator iterator, _NodeBase node) + cdef void iteratorStoreNext(_ElementIterator iterator, _Element node) ########################################################################## # other helper functions @@ -205,4 +202,4 @@ cdef _Document documentOrRaise(object input) # find the root Element of an Element (itself!), ElementTree or Document - cdef _NodeBase rootNodeOrRaise(object input) + cdef _Element rootNodeOrRaise(object input) Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Wed Feb 14 15:19:02 2007 @@ -196,16 +196,16 @@ functions would be reference counted too soon, during the XPath evaluation. This is most important in the case of exceptions. """ - cdef _NodeBase element - if isinstance(obj, _NodeBase): + cdef _Element element + if isinstance(obj, _Element): self._temp_refs.add(obj) - self._temp_refs.add((<_NodeBase>obj)._doc) + self._temp_refs.add((<_Element>obj)._doc) return elif _isString(obj) or not python.PySequence_Check(obj): return for o in obj: - if isinstance(o, _NodeBase): - element = <_NodeBase>o + if isinstance(o, _Element): + element = <_Element>o #print "Holding element:", element._c_node self._temp_refs.add(element) #print "Holding document:", element._doc._c_doc @@ -245,7 +245,7 @@ cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL: cdef xpath.xmlNodeSet* resultSet - cdef _NodeBase node + cdef _Element node if python.PyUnicode_Check(obj): obj = _utf8(obj) if python.PyString_Check(obj): @@ -256,13 +256,13 @@ return xpath.xmlXPathNewFloat(obj) if obj is None: resultSet = xpath.xmlXPathNodeSetCreate(NULL) - elif isinstance(obj, _NodeBase): - resultSet = xpath.xmlXPathNodeSetCreate((<_NodeBase>obj)._c_node) + elif isinstance(obj, _Element): + resultSet = xpath.xmlXPathNodeSetCreate((<_Element>obj)._c_node) elif python.PySequence_Check(obj): resultSet = xpath.xmlXPathNodeSetCreate(NULL) for element in obj: - if isinstance(element, _NodeBase): - node = <_NodeBase>element + if isinstance(element, _Element): + node = <_Element>element xpath.xmlXPathNodeSetAdd(resultSet, node._c_node) else: xpath.xmlXPathFreeNodeSet(resultSet) @@ -356,7 +356,7 @@ cdef void _extension_function_call(_BaseContext context, function, xpath.xmlXPathParserContext* ctxt, int nargs): - cdef _NodeBase node + cdef _Element node cdef _Document doc cdef xpath.xmlXPathObject* obj cdef int i Modified: lxml/trunk/src/lxml/iterparse.pxi ============================================================================== --- lxml/trunk/src/lxml/iterparse.pxi (original) +++ lxml/trunk/src/lxml/iterparse.pxi Wed Feb 14 15:19:02 2007 @@ -330,7 +330,7 @@ cdef char* _tag_name def __init__(self, element_or_tree, events=("end",), tag=None): - cdef _NodeBase root + cdef _Element root cdef int ns_count root = _rootNodeOrRaise(element_or_tree) self._event_filter = _buildIterparseEventFilter(events) @@ -369,8 +369,8 @@ return self def __next__(self): - cdef _NodeBase node - cdef _NodeBase next_node + cdef _Element node + cdef _Element next_node cdef int ns_count if python.PyList_GET_SIZE(self._events): return self._pop_event(0) @@ -406,7 +406,7 @@ return self._pop_event(0) raise StopIteration - cdef int _start_node(self, _NodeBase node): + cdef int _start_node(self, _Element node): cdef int ns_count if self._event_filter & ITERPARSE_FILTER_START_NS: ns_count = _appendStartNsEvents(node._c_node, self._events) @@ -420,8 +420,8 @@ python.PyList_Append(self._events, ("start", node)) return ns_count - cdef _NodeBase _end_node(self): - cdef _NodeBase node + cdef _Element _end_node(self): + cdef _Element node node, ns_count = self._pop_node() if self._event_filter & ITERPARSE_FILTER_END: if self._tag_tuple is None or \ Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Wed Feb 14 15:19:02 2007 @@ -4,19 +4,19 @@ # structure of the respective node to avoid multiple instantiation of # the Python class -cdef _NodeBase getProxy(xmlNode* c_node): +cdef _Element getProxy(xmlNode* c_node): """Get a proxy for a given node. """ #print "getProxy for:", c_node if c_node is not NULL and c_node._private is not NULL: - return <_NodeBase>c_node._private + return <_Element>c_node._private else: return None cdef int hasProxy(xmlNode* c_node): return c_node._private is not NULL -cdef registerProxy(_NodeBase proxy): +cdef registerProxy(_Element proxy): """Register a proxy and type for the node it's proxying for. """ cdef xmlNode* c_node @@ -28,7 +28,7 @@ assert c_node._private is NULL, "double registering proxy!" c_node._private = proxy -cdef unregisterProxy(_NodeBase proxy): +cdef unregisterProxy(_Element proxy): """Unregister a proxy for the node it's proxying for. """ cdef xmlNode* c_node @@ -154,14 +154,14 @@ c_node = c_doc.children tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_doc, c_node, 1) if c_node._private is not NULL: - (<_NodeBase>c_node._private)._c_node = NULL + (<_Element>c_node._private)._c_node = NULL tree.END_FOR_EACH_ELEMENT_FROM(c_node) tree.xmlFreeDoc(c_doc) ################################################################################ # change _Document references when a node changes documents -cdef void moveNodeToDocument(_NodeBase node, _Document doc): +cdef void moveNodeToDocument(_Element node, _Document doc): """For a node and all nodes below, change document. A node can change document in certain operations as an XML @@ -185,5 +185,5 @@ c_node = c_parent.children tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_parent, c_node, 1) if c_node._private is not NULL: - (<_NodeBase>c_node._private)._doc = doc + (<_Element>c_node._private)._doc = doc tree.END_FOR_EACH_ELEMENT_FROM(c_node) Modified: lxml/trunk/src/lxml/public-api.pxi ============================================================================== --- lxml/trunk/src/lxml/public-api.pxi (original) +++ lxml/trunk/src/lxml/public-api.pxi Wed Feb 14 15:19:02 2007 @@ -6,10 +6,10 @@ c_node = _copyNodeToDoc(c_root, doc._c_doc) return _elementFactory(doc, c_node) -cdef public _ElementTree elementTreeFactory(_NodeBase context_node): +cdef public _ElementTree elementTreeFactory(_Element context_node): return newElementTree(context_node, _ElementTree) -cdef public _ElementTree newElementTree(_NodeBase context_node, +cdef public _ElementTree newElementTree(_Element context_node, object subclass): if context_node is NULL or context_node is None: raise TypeError @@ -47,7 +47,7 @@ cdef public _Document documentOrRaise(object input): return _documentOrRaise(input) -cdef public _NodeBase rootNodeOrRaise(object input): +cdef public _Element rootNodeOrRaise(object input): return _rootNodeOrRaise(input) cdef public object textOf(xmlNode* c_node): @@ -77,16 +77,16 @@ char* ns, char* name): return _attributeValueFromNsName(c_element, ns, name) -cdef public object getAttributeValue(_NodeBase element, key, default): +cdef public object getAttributeValue(_Element element, key, default): return _getAttributeValue(element, key, default) cdef public object iterattributes(_Element element, int keysvalues): return _attributeIteratorFactory(element, keysvalues) -cdef public int setAttributeValue(_NodeBase element, key, value) except -1: +cdef public int setAttributeValue(_Element element, key, value) except -1: return _setAttributeValue(element, key, value) -cdef public int delAttribute(_NodeBase element, key) except -1: +cdef public int delAttribute(_Element element, key) except -1: return _delAttribute(element, key) cdef public int delAttributeFromNsName(tree.xmlNode* c_element, @@ -128,7 +128,7 @@ cdef public object namespacedNameFromNsName(char* href, char* name): return _namespacedNameFromNsName(href, name) -cdef public void iteratorStoreNext(_ElementIterator iterator, _NodeBase node): +cdef public void iteratorStoreNext(_ElementIterator iterator, _Element node): iterator._storeNext(node) cdef public void initTagMatch(_ElementTagMatcher matcher, tag): Modified: lxml/trunk/src/lxml/relaxng.pxi ============================================================================== --- lxml/trunk/src/lxml/relaxng.pxi (original) +++ lxml/trunk/src/lxml/relaxng.pxi Wed Feb 14 15:19:02 2007 @@ -20,7 +20,7 @@ cdef relaxng.xmlRelaxNG* _c_schema def __init__(self, etree=None, file=None): cdef _Document doc - cdef _NodeBase root_node + cdef _Element root_node cdef xmlNode* c_node cdef xmlDoc* fake_c_doc cdef char* c_href @@ -76,7 +76,7 @@ Returns true if document is valid, false if not.""" cdef python.PyThreadState* state cdef _Document doc - cdef _NodeBase root_node + cdef _Element root_node cdef xmlDoc* c_doc cdef relaxng.xmlRelaxNGValidCtxt* valid_ctxt cdef int ret Modified: lxml/trunk/src/lxml/serializer.pxi ============================================================================== --- lxml/trunk/src/lxml/serializer.pxi (original) +++ lxml/trunk/src/lxml/serializer.pxi Wed Feb 14 15:19:02 2007 @@ -1,6 +1,6 @@ # XML serialization and output functions -cdef _tostring(_NodeBase element, encoding, +cdef _tostring(_Element element, encoding, int write_xml_declaration, int pretty_print): "Serialize an element to an encoded string representation of its XML tree." cdef python.PyThreadState* state @@ -43,7 +43,7 @@ tree.xmlOutputBufferClose(c_buffer) return result -cdef _tounicode(_NodeBase element, int pretty_print): +cdef _tounicode(_Element element, int pretty_print): "Serialize an element to the Python unicode representation of its XML tree." cdef python.PyThreadState* state cdef tree.xmlOutputBuffer* c_buffer @@ -146,7 +146,7 @@ cdef int _closeFilelikeWriter(void* ctxt): return (<_FilelikeWriter>ctxt).close() -cdef _tofilelike(f, _NodeBase element, encoding, +cdef _tofilelike(f, _Element element, encoding, int write_xml_declaration, int pretty_print): cdef python.PyThreadState* state cdef _FilelikeWriter writer @@ -185,7 +185,7 @@ else: writer._exc_context._raise_if_stored() -cdef _tofilelikeC14N(f, _NodeBase element): +cdef _tofilelikeC14N(f, _Element element): cdef python.PyThreadState* state cdef _FilelikeWriter writer cdef tree.xmlOutputBuffer* c_buffer Modified: lxml/trunk/src/lxml/xmlid.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlid.pxi (original) +++ lxml/trunk/src/lxml/xmlid.pxi Wed Feb 14 15:19:02 2007 @@ -22,7 +22,7 @@ Note that you must not modify the XML tree if you use the ID dictionary. The results are undefined. """ - cdef _NodeBase root + cdef _Element root root = XML(text) # xml:id spec compatible implementation: use DTD ID attributes from libxml2 if root._doc._c_doc.ids is NULL: Modified: lxml/trunk/src/lxml/xmlschema.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlschema.pxi (original) +++ lxml/trunk/src/lxml/xmlschema.pxi Wed Feb 14 15:19:02 2007 @@ -19,7 +19,7 @@ cdef xmlschema.xmlSchema* _c_schema def __init__(self, etree=None, file=None): cdef _Document doc - cdef _NodeBase root_node + cdef _Element root_node cdef xmlDoc* fake_c_doc cdef xmlNode* c_node cdef char* c_href @@ -72,7 +72,7 @@ cdef python.PyThreadState* state cdef xmlschema.xmlSchemaValidCtxt* valid_ctxt cdef _Document doc - cdef _NodeBase root_node + cdef _Element root_node cdef xmlDoc* c_doc cdef int ret Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Wed Feb 14 15:19:02 2007 @@ -122,7 +122,7 @@ XPath evaluators must not be shared between threads. """ cdef _Element _element - def __init__(self, _NodeBase element not None, namespaces=None, extensions=None): + def __init__(self, _Element element not None, namespaces=None, extensions=None): cdef xpath.xmlXPathContext* xpathCtxt cdef int ns_register_status cdef _Document doc @@ -253,7 +253,7 @@ cdef xpath.xmlXPathContext* xpathCtxt cdef xpath.xmlXPathObject* xpathObj cdef _Document document - cdef _NodeBase element + cdef _Element element cdef _XPathContext context document = _documentOrRaise(_etree_or_element) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Wed Feb 14 15:19:02 2007 @@ -279,7 +279,7 @@ cdef xmlDoc* c_doc cdef xmlDoc* fake_c_doc cdef _Document doc - cdef _NodeBase root_node + cdef _Element root_node doc = _documentOrRaise(xslt_input) root_node = _rootNodeOrRaise(xslt_input) @@ -341,7 +341,7 @@ cdef python.PyThreadState* state cdef _XSLTContext context cdef _Document input_doc - cdef _NodeBase root_node + cdef _Element root_node cdef _Document result_doc cdef _Document profile_doc cdef xmlDoc* c_profile_doc From scoder at codespeak.net Thu Feb 15 09:30:34 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 15 Feb 2007 09:30:34 +0100 (CET) Subject: [Lxml-checkins] r38871 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20070215083034.9848E10079@code0.codespeak.net> Author: scoder Date: Thu Feb 15 09:30:32 2007 New Revision: 38871 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tests/test_elementtree.py Log: rich comparison of QName objects Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Feb 15 09:30:32 2007 @@ -8,6 +8,8 @@ Features added -------------- +* Rich comparison of QName objects + * Support for regular expressions in benchmark selection * get/set emulation (not .attrib!) for attributes on processing instructions Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Feb 15 09:30:32 2007 @@ -210,6 +210,9 @@ return self.text def __hash__(self): return self.text.__hash__() + def __richcmp__(one, other, int op): + return python.PyObject_RichCompare( + str(one), str(other), op) # forward declaration of _BaseParser, see parser.pxi Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Thu Feb 15 09:30:32 2007 @@ -2320,6 +2320,15 @@ self.assertEquals(a1.tag, "{myns}a") self.assertEquals(a2.tag, "{myns}a") + def test_qname_cmp(self): + etree = self.etree + qname1 = etree.QName('myns', 'a') + qname2 = etree.QName('myns', 'a') + self.assertEquals(qname1, "{myns}a") + self.assertEquals("{myns}a", qname2) + self.assertEquals(qname1, qname1) + self.assertEquals(qname1, qname2) + def _writeElement(self, element, encoding='us-ascii'): """Write out element for comparison. """ From scoder at codespeak.net Wed Feb 14 15:20:33 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 14 Feb 2007 15:20:33 +0100 (CET) Subject: [Lxml-checkins] r38837 - in lxml/pyrex: . Doc Pyrex/Compiler Pyrex/Mac Message-ID: <20070214142033.178E110089@code0.codespeak.net> Author: scoder Date: Wed Feb 14 15:20:32 2007 New Revision: 38837 Added: lxml/pyrex/Makefile lxml/pyrex/Pyrex/Compiler/ModuleNode.py Modified: lxml/pyrex/CHANGES.txt lxml/pyrex/Doc/extension_types.html lxml/pyrex/Doc/overview.html lxml/pyrex/Doc/sharing.html lxml/pyrex/Doc/special_methods.html lxml/pyrex/Pyrex/Compiler/Code.py lxml/pyrex/Pyrex/Compiler/ExprNodes.py lxml/pyrex/Pyrex/Compiler/Nodes.py lxml/pyrex/Pyrex/Compiler/Parsing.py lxml/pyrex/Pyrex/Compiler/PyrexTypes.py lxml/pyrex/Pyrex/Compiler/Symtab.py lxml/pyrex/Pyrex/Compiler/Version.py lxml/pyrex/Pyrex/Mac/DarwinSystem.py lxml/pyrex/ToDo.txt lxml/pyrex/setup.py Log: updated to 0.9.5.1a (+ enum work around) Modified: lxml/pyrex/CHANGES.txt ============================================================================== --- lxml/pyrex/CHANGES.txt (original) +++ lxml/pyrex/CHANGES.txt Wed Feb 14 15:20:32 2007 @@ -1,3 +1,139 @@ +0.9.5.1a +-------- + +Bug fixes: + + - Package list now calculated dynamically in setup.py so that + it will work with or without the testing framework installed. + + +0.9.5.1 +------- + +Bug fixes: + + - Comparing two values of the same enum type incorrectly + produced an error. [Anders Gustafsson ] + + - Compiler crash caused by assigning a Python value to + a variable of an enum type. [Peter Johnson ] + + - Comparison between pointer and array incorrectly produced + a type mismatch error. + [Helmut Jarausch ] + + - Unused local Python variable had spurious init/cleanup code + generated for it, causing C compilation errors. + [Helmut Jarausch ] + + - Updated list of packages in setup.py. + +Modifications: + + - NULL in Pyrex source now translated into NULL instead of 0 + in C code, to allow for the possibility of calling something + not defined with a prototype in an external header. + [Adapted Cat ] + + +0.9.5 +----- + +Enhancements: + + - Exception return values may now be specified by arbitrary + constant expressions of appropriate type, not just literals. + [Stefan Behnel ] + + - Redundant type check now omitted when passing a literal None + to a function expecting an extension type. + [Patch by Sam Rushing ] + + - New-style classes now allowed as exceptions for compatibility + with Python 2.5 (inheritance from BaseException not currently + checked). + [Stefan Behnel ] + + - Sequence unpacking is now done using the iterator protocol + instead of indexing. + + - Allocation of an empty tuple is avoided when making a + Python call with no arguments. + [Stefan Behnel ] + + - Most warnings about unused variables and labels have been + eliminated. + + - Support for running the test suite on Linux added but not + yet fully tested. [Based in part on patch by Eric Wald + ]. + + - Makefile included for compiling the patched Carbon File module + used by the MacOSX test code. + +Modifications: + + - Type rules for enums tightened for compatibility with C++. + + - Direct assignment from float to int disallowed to prevent + C++ compilation warnings. + + - Hex literals left as hex in C code to avoid warnings from + the C compiler about decimal constants becoming unsigned. + +Bug fixes: + + - Exception raised during argument conversion could cause crash + due to uninitialised local variables. + [Konrad Hinsen ] + + - Assignment to a C attribute of an extension type from a + different type could generate C code with a pointer type + mismatch. [Atsuo Ishimoto ] + + - Backslash in a string literal before a non-special character + was not handled correctly. [Yuan Mang ] + + - Temporary vars used by del statement not being properly + released, sometimes leading to double decrefs. + [Jiba ] + + - A return statement whose expression raises an exception + inside a try-except that catches the exception could cause + a crash. [Anders Gustafsson ] + + - Fixed type compatibility checking problem between pointers + and arrays. [Lenard Lindstrom ] + + - Circular imports between modules defining extension types + caused unresolvable import order conflicts. + [Mike Wyatt ] + + - Cimporting multiple submodules from the same package caused + a redefined name error for the top level name. + [Martin Albrecht ] + + - Incorrect reference counting when assigning to an element of an + array that is a C attribute of an extension type. + [Igor Khavkine ] + + - Weak-referenceable extension types were not implemented + properly. [Chris Perkins , + Peter Johnson ] + + - Crash if C variable declared readonly outside an extension + type definition. [Eric Huss ] + +Doc updates: + + - Expanded discussion of the need for type declarations to enable + access to attributes of extension types. + + - Added a section "Source Files and Compilation" explaining the + rules for naming of source files of modules residing in packages, + and instructions for using the compiler and distutils extension. + + 0.9.4.1 ------- Modified: lxml/pyrex/Doc/extension_types.html ============================================================================== --- lxml/pyrex/Doc/extension_types.html (original) +++ lxml/pyrex/Doc/extension_types.html Wed Feb 14 15:20:32 2007 @@ -1,48 +1,148 @@ - - - Extension Types + + + + + + + + + + + + + + + Extension Types + + + -


Extension Types + + + +

+
Extension Types

-

Contents

-
    + + + +

    Contents

    + + + + + + + +

    Introduction

    + + As well as creating normal user-defined classes with the Python class statement, Pyrex also lets you create new built-in Python types, known as extension types. You define an extension type using the cdef class statement. Here's an example: -
    cdef class Shrubbery:

        cdef int width, height

    -

        def __init__(self, w, h):
    +

    cdef class Shrubbery: + +

        cdef int width, height

    + + + + +

        def __init__(self, w, h):
    + +         self.width = w
    + +         self.height = h

    -

        def describe(self):
    + + + + +

        def describe(self):
    + +         print "This shrubbery is", self.width, \
    + +             "by", self.height, "cubits."

    + +
    + + As you can see, a Pyrex extension type definition looks a lot like a Python class definition. Within it, you use the def statement to define methods that can be called from Python code. You can even define many of @@ -52,7 +152,12 @@ extension type), or they may be of any C data type. So you can use extension types to wrap arbitrary C data structures and provide a Python-like interface to them.

    -

    Attributes

    + + + +

    Attributes

    + + Attributes of an extension type are stored directly in the object's C struct. The set of attributes is fixed at compile time; you can't add attributes to an extension type instance at run time simply by assigning to them, as @@ -62,12 +167,24 @@ by Python attribute lookup, or by direct access to the C struct from Pyrex code. Python code is only able to access attributes of an extension type by the first method, but Pyrex code can use either method.

    -

    By default, extension type attributes are only accessible by direct access, + + + +

    By default, extension type attributes are only accessible by direct access, not Python access, which means that they are not accessible from Python code. To make them accessible from Python code, you need to declare them as public or readonly. For example,

    -
    cdef class Shrubbery:
    + + + +
    cdef class Shrubbery:
    + +     cdef public int width, height
    + +     cdef readonly float depth
    + + makes the width and height attributes readable and writable from Python code, and the depth attribute readable but not writable. @@ -76,21 +193,64 @@ although read-write exposure is only possible for generic Python attributes (of type object). If the attribute is declared to be of an extension type, it must be exposed readonly.

    -

    Note also that the public and readonly options apply + + + +

    Note also that the public and readonly options apply only to Python access, not direct access. All the attributes of an -extension type are always readable and writable by direct access.

    -

    Howerver, for direct access to be possible, the Pyrex compiler must know +extension type are always readable and writable by direct access.

    + +

    Type declarations

    + + + +

    Before you can directly access the attributes of an extension type, the Pyrex compiler must know that you have an instance of that type, and not just a generic Python object. It knows this already in the case of the "self" parameter of the methods of -that type, but in other cases you will have to tell it by means of a declaration. -For example,

    -
    cdef widen_shrubbery(Shrubbery sh, extra_width):
    +that type, but in other cases you will have to use a type declaration.

    + +

    For example, in the following function,

    + +
    cdef widen_shrubbery(sh, extra_width): # BAD
    + +     sh.width = sh.width + extra_width
    - If you attempt to access an extension type attribute through a generic -object reference, Pyrex will use a Python attribute lookup. If the attribute -is exposed for Python access (using public or readonly) -then this will work, but it will be much slower than direct access. + +

    because the sh parameter hasn't been given a type, the width +attribute will be accessed by a Python attribute lookup. If the +attribute has been declared public or readonly then this will work, but +it will be very inefficient. If the attribute is private, it will not work at all -- the +code will compile, but an attribute error will be raised at run time.

    + +

    The solution is to declare sh as being of type Shrubbery, as follows:

    + + + +
    cdef widen_shrubbery(Shrubbery sh, extra_width):
    + + +     sh.width = sh.width + extra_width
    + +Now the Pyrex compiler knows that sh has a C attribute called width and will generate code to access it directly and efficiently. The same consideration applies to local variables, for example,
    + +
    + +
    cdef Shrubbery another_shrubbery(Shrubbery sh1):
    + +    cdef Shrubbery sh2
    + +    sh2 = Shrubbery()
    + +    sh2.width = sh1.width
    + +    sh2.height = sh1.height
    + +    return sh2
    + +

    Extension types and None

    + + When you declare a parameter or C variable as being of an extension type, Pyrex will allow it to take on the value None as well as values of its declared type. This is analogous to the way a C pointer can take on the value NULL, @@ -103,223 +263,589 @@

    You need to be particularly careful when exposing Python functions which take extension types as arguments. If we wanted to make widen_shrubbery a Python function, for example, if we simply wrote

    -
    def widen_shrubbery(Shrubbery sh, extra_width): # This is
    + + + +
    def widen_shrubbery(Shrubbery sh, extra_width): # This is
    + +     sh.width = sh.width + extra_width           # dangerous!
    + + then users of our module could crash it by passing None for the sh parameter.

    One way to fix this would be

    -
    def widen_shrubbery(Shrubbery sh, extra_width):
    + + + +
    def widen_shrubbery(Shrubbery sh, extra_width):
    + +     if sh is None:
    + +         raise TypeError
    + +     sh.width = sh.width + extra_width
    + + but since this is anticipated to be such a frequent requirement, Pyrex provides a more convenient way. Parameters of a Python function declared as an extension type can have a not None clause:
    def widen_shrubbery(Shrubbery sh not None, extra_width):
    + +     sh.width = sh.width + extra_width
    + + Now the function will automatically check that sh is not None along with checking that it has the right type.

    Note, however that the not None clause can only be used in Python functions (defined with def) and not C functions (defined with cdef). If you need to check whether a parameter to a C function is None, you will need to do it yourself.

    -

    Some more things to note:

    -
      + + + +

      Some more things to note:

      + + + +
        + +
      • The self parameter of a method of an extension type is guaranteed never to be None.
      • -
      -
        + + + +
      + + + +
        + +
      • When comparing a value with None, keep in mind that, if x is a Python object, x is None and x is not None are very efficient because they translate directly to C pointer comparisons, whereas x == None and x != None, or simply using x as a boolean value (as in if x: ...) will invoke Python operations and therefore be much slower.
      • -
      -

      Special methods

      + + + +
    + + + +

    Special methods

    + + Although the principles are similar, there are substantial differences between many of the __xxx__ special methods of extension types and their Python counterparts. There is a separate page devoted to this subject, and you should read it carefully before attempting to use any special methods in your extension types.

    Properties

    + + There is a special syntax for defining properties in an extension class: -
    cdef class Spam:

        property cheese:

    -

            "A doc string can go +

    cdef class Spam: + +

        property cheese:

    + + + + +

            "A doc string can go here."

    -

            def __get__(self): + + + + +

            def __get__(self):
    + +             # This is called when the property is read.
    + +             ...

    -

            def __set__(self, value): + + + + +

            def __set__(self, value):
    + +             # This is called when the property is written.
    + +             ...

    -

            def __del__(self): + + + + +

            def __del__(self):
    + +             # This is called when the property is deleted.
    + +  

    + +
    + + The __get__, __set__ and __del__ methods are all optional; if they are omitted, an exception will be raised when the corresponding operation is attempted.

    Here's a complete example. It defines a property which adds to a list each time it is written to, returns the list when it is read, and empties the list when it is deleted.
    + +  

    -
    + + + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -
    cheesy.pyxTest input
    cdef class CheeseShop: -

      cdef object cheeses

    -

      def __new__(self):
    + + +

      cdef object cheeses

    + + + + +

      def __new__(self):
    + +     self.cheeses = []

    -

      property cheese:

    -

        def __get__(self):
    + + + + +

      property cheese:

    + + + + +

        def __get__(self):
    + +       return "We don't have: %s" % self.cheeses

    -

        def __set__(self, value):
    + + + + +

        def __set__(self, value):
    + +       self.cheeses.append(value)

    -

        def __del__(self):
    + + + + +

        def __del__(self):
    + +       del self.cheeses[:]

    + +
    from cheesy import CheeseShop -

    shop = CheeseShop()
    + + +

    shop = CheeseShop()
    + + print shop.cheese

    -

    shop.cheese = "camembert"
    + + + + +

    shop.cheese = "camembert"
    + + print shop.cheese

    -

    shop.cheese = "cheddar"
    + + + + +

    shop.cheese = "cheddar"
    + + print shop.cheese

    -

    del shop.cheese
    + + + + +

    del shop.cheese
    + + print shop.cheese

    + +
    Test output
    We don't have: []
    + + We don't have: ['camembert']
    + + We don't have: ['camembert', 'cheddar']
    + + We don't have: []
    + + + + + + + +
    -

    Subclassing

    + + + +

    Subclassing

    + + An extension type may inherit from a built-in type or another extension type:
    cdef class Parrot:
    -     ...

    cdef class Norwegian(Parrot):
    + + +     ... + +

    cdef class Norwegian(Parrot):
    + +     ...

    + +
    -


    + + + +


    + + A complete definition of the base type must be available to Pyrex, so if the base type is a built-in type, it must have been previously declared as an extern extension type. If the base type is defined in another Pyrex module, it must either be declared as an extern extension type or imported using the cimport statement.

    -

    An extension type can only have one base class (no multiple inheritance). + + + +

    An extension type can only have one base class (no multiple inheritance).

    -

    Pyrex extension types can also be subclassed in Python. A Python class + + + +

    Pyrex extension types can also be subclassed in Python. A Python class can inherit from multiple extension types provided that the usual Python rules for multiple inheritance are followed (i.e. the C layouts of all the base classes must be compatible).
    + +

    -

    C methods

    + + + +

    C methods

    + + Extension types can have C methods as well as Python methods. Like C functions, C methods are declared using cdef instead of def. C methods are "virtual", and may be overridden in derived extension types.
    + +
    - + + + +
    + + + + + + + + + + + + + + + + + + -
    pets.pyx
    + +
    Output
    + +
    cdef class Parrot:
    + +
    + +   cdef void describe(self):
    + +     print "This parrot is resting."
    + +
    + + cdef class Norwegian(Parrot):
    + +
    + +   cdef void describe(self):
    + +     Parrot.describe(self)
    + +     print "Lovely plumage!"
    + +
    + +
    + + cdef Parrot p1, p2
    + + p1 = Parrot()
    + + p2 = Norwegian()
    + + print "p1:"
    + + p1.describe()
    + + print "p2:"
    + + p2.describe()

    + +
    p1:
    + + This parrot is resting.
    + + p2:
    + +
    This parrot is resting.
    + +
    Lovely plumage!
    + +
    + + + + + + + +
    + + The above example also illustrates that a C method can call an inherited C method using the usual Python technique, i.e.
    + +
    Parrot.describe(self)
    + +
    -

    Forward-declaring extension types

    + + + +

    Forward-declaring extension types

    + + Extension types can be forward-declared, like struct and union types. This will be necessary if you have two extension types that need to refer to each other, e.g. -
    cdef class Shrubbery # forward declaration

    cdef class Shrubber:
    +

    cdef class Shrubbery # forward declaration + +

    cdef class Shrubber:
    + +     cdef Shrubbery work_in_progress

    -

    cdef class Shrubbery:
    + + + + +

    cdef class Shrubbery:
    + +     cdef Shrubber creator

    + +
    + + If you are forward-declaring an exension type that has a base class, you must specify the base class in both the forward declaration and its subsequent definition, for example,
    + +
    cdef class A(B)
    + +
    + + ...
    + +
    + + cdef class A(B):
    + +     # attributes and methods

    + +
    -

    Making extension types weak-referenceable

    By + + + +

    Making extension types weak-referenceable

    + +By default, extension types do not support having weak references made to them. You can enable weak referencing by declaring a C attribute of type object called __weakref__. For example,
    + +
    + +
    cdef class ExplodingAnimal:
    + +     """This animal will self-destruct when it is
    + +        no longer strongly referenced."""
    + +    
    + +     cdef object __weakref__
    + +
    + +
    + +

    Public and external extension types

    + + Extension types can be declared extern or public. An extern extension type declaration makes an extension type defined in external C code available to a Pyrex module. A public extension type declaration makes an extension type defined in a Pyrex module available to external C code.

    External extension types

    + + An extern extension type allows you to gain access to the internals of Python objects defined in the Python core or in a non-Pyrex extension module. @@ -328,80 +854,191 @@ module. While you can still do that, Pyrex 0.8 and later provides a better mechanism for this. See Sharing C Declarations Between Pyrex Modules.
    + + Here is an example which will let you get at the C-level members of the built-in complex object. -
    cdef extern from "complexobject.h":

        struct Py_complex:
    +

    cdef extern from "complexobject.h": + +

        struct Py_complex:
    + +         double real
    + +         double imag

    -

        ctypedef class __builtin__.complex [object PyComplexObject]: + + + + +

        ctypedef class __builtin__.complex [object PyComplexObject]:
    + +         cdef Py_complex cval

    -

    # A function which uses the above type
    + + + + +

    # A function which uses the above type
    + + def spam(complex c):
    + +     print "Real:", c.cval.real
    + +     print "Imag:", c.cval.imag

    + +
    + + Some important things to note are:
      + +
    1. In this example, ctypedef class has been used. This is because, in the Python header files, the PyComplexObject struct is declared with
      + +
      + + + +
      ctypedef struct {
      + +     ...
      + + } PyComplexObject;
      + +
      + +
      -
    2. As well as the name of the extension type, the module in which + + +
    3. + +
    4. As well as the name of the extension type, the module in which its type object can be found is also specified. See the implicit importing section below. 
      + +
      + +
    5. -
    6. When declaring an external extension type, you don't declare + + +
    7. When declaring an external extension type, you don't declare any methods. Declaration of methods is not required in order to call them, because the calls are Python method calls. Also, as with structs and unions, if your extension class declaration is inside a cdef extern from block, you only need to declare those C members which you wish to access.
    8. -
    -

    Implicit importing

    -
    Backwards Incompatibility Note: + + + + + + + +

    Implicit importing

    + + + +
    Backwards Incompatibility Note: You will have to update any pre-0.8 Pyrex modules you have which use extern extension types. I apologise for this, but for complicated reasons it proved to be too difficult to continue supporting the old way of doing these while introducing the new features that I wanted.
    + + Pyrex 0.8 and later requires you to include a module name in an extern extension class declaration, for example,
    cdef extern class MyModule.Spam:
    + +     ...
    + + The type object will be implicitly imported from the specified module and bound to the corresponding name in this module. In other words, in this example an implicit
      -
      from MyModule import Spam
      -
    + + + + +
    from MyModule import Spam
    + + + + + + statement will be executed at module load time.

    The module name can be a dotted name to refer to a module inside a package hierarchy, for example,

    -
    cdef extern class My.Nested.Package.Spam:
    + + + +
    cdef extern class My.Nested.Package.Spam:
    + +     ...
    + + You can also specify an alternative name under which to import the type using an as clause, for example,
      + + cdef extern class My.Nested.Package.Spam as Yummy:
      -    ...
    + + +    ... + + + which corresponds to the implicit import statement
      -
      from My.Nested.Package import Spam as Yummy
      -
    -

    Type names vs. constructor names

    + + + + +
    from My.Nested.Package import Spam as Yummy
    + + + + + + + +

    Type names vs. constructor names

    + + Inside a Pyrex module, the name of an extension type serves two distinct purposes. When used in an expression, it refers to a module-level global variable holding the type's constructor (i.e. its type-object). However, it can also be used as a C type name to declare variables, arguments and return values of that type.

    When you declare

    -
    cdef extern class MyModule.Spam:
    + + + +
    cdef extern class MyModule.Spam:
    + +     ...
    + + the name Spam serves both these roles. There may be other names by which you can refer to the constructor, but only Spam can be used as a type name. For example, if you were to explicity import MyModule, @@ -409,21 +1046,34 @@ wouldn't be able to use MyModule.Spam as a type name.

    When an as clause is used, the name specified in the as clause also takes over both roles. So if you declare

    -
    cdef extern class MyModule.Spam as Yummy:
    + + + +
    cdef extern class MyModule.Spam as Yummy:
    + +     ...
    + + then Yummy becomes both the type name and a name for the constructor. Again, there are other ways that you could get hold of the constructor, but only Yummy is usable as a type name.

    Public extension types

    + + An extension type can be declared public, in which case a .h file is generated containing declarations for its object struct and type object. By including the .h file in external C code that you write, that code can access the attributes of the extension type.

    Name specification clause

    + + The part of the class declaration in square brackets is a special feature only available for extern or public extension types. The full form of this clause is
    [object object_struct_name, type type_object_name ]
    + + where object_struct_name is the name to assume for the type's C struct, and type_object_name is the name to assume for the type's statically declared type object. (The object and type clauses can be written @@ -433,12 +1083,30 @@ generate code that is compatible with the declarations in the header file. Otherwise, for extern extension types, the object clause is optional.

    -

    For public extension types, the object and type clauses + + + +

    For public extension types, the object and type clauses are both required, because Pyrex must be able to generate code that is compatible with external C code.

    -

    -

    + + + +

    + + + +

    + + Back to the Language Overview
    + +  
    + +
    - \ No newline at end of file + + + + Modified: lxml/pyrex/Doc/overview.html ============================================================================== --- lxml/pyrex/Doc/overview.html (original) +++ lxml/pyrex/Doc/overview.html Wed Feb 14 15:20:32 2007 @@ -1,108 +1,216 @@ + + + - - Pyrex Language Overview - + + Pyrex Language Overview + + -


    Overview of the Pyrex Language 

    +

    +
    Overview of the Pyrex Language  +

    + This document informally describes the extensions to the Python language made by Pyrex. Some day there will be a reference manual covering everything - in more detail.
    + in more detail. 

    Contents

    -   -

    Contents

    + -


    Basics +


    +

    Source Files and Compilation
    +

    +

    +Pyrex source file names consist of the name of the module followed by a .pyx extension, for example a module called primes would have a source file named primes.pyx.
    + +
    - This section describes the basic features of the Pyrex language. The facilities +If your module is destined to live in a package, the source file name should include the full dotted name that the module will eventually have. For example, a module called primes that will be installed in a package called numbers should have a source file called numbers.primes.pyx. +This will ensure that the __name__ properties of the module and any +classes defined in it are set correctly. If you don't do this, you may +find that pickling doesn't work, among other problems. It also ensures +that the Pyrex compiler has the right idea about the layout of the +module namespace, which can be important when accessing extension types +defined in other modules.
    +
    +Once you have written your .pyx file, there are a couple of ways of +turning it into an extension module. One way is to compile it manually +with the Pyrex compiler, e.g.
    +
    +
    pyrexc primes.pyx
    +
    +
    +This will produce a file called primes.c, +which then needs to be compiled with the C compiler using whatever +options are appropriate on your platform for generating an extension +module. There's a Makefile in the Demos directory (called Makefile.nodistutils) that shows how to do this for Linux.
    +
    +The other, and probably better, way is to use the distutils extension provided with Pyrex. See the Setup.py +file in the Demos directory for an example of how to use it. This +method has the advantage of being cross-platform -- the same setup file +should work on any platform where distutils can compile an extension +module.
    +
    +
    +

    Language Basics +

    +This section describes the basic features of the Pyrex language. The facilities covered in this section allow you to create Python-callable functions that manipulate C data structures and convert between Python and C data types. - Later sections will cover facilities for wrapping external C code, creating new Python types and cooperation between Pyrex modules. + Later sections will cover facilities for wrapping external C code, creating new Python types and cooperation between Pyrex modules.
    +

    Python functions vs. C functions

    + There are two kinds of function definition in Pyrex:

    Python functions are defined using the def statement, as in Python. They take Python objects as parameters and return Python objects.

    +

    C functions are defined using the new cdef statement. They take either Python objects or C values as parameters, and can return either Python objects or C values.

    +

    Within a Pyrex module, Python functions and C functions can call each other freely, but only Python functions can be called from outside the module by interpreted Python code. So, any functions that you want to "export" from your Pyrex module must be declared as Python functions using def.

    +

    Parameters of either type of function can be declared to have C data types, using normal C declaration syntax. For example,

    + -
    def spam(int i, char *s):
        ...
    -
    cdef int eggs(unsigned long l, float f):
        ...
    +
    +
    def spam(int i, char *s):
        ...
    + + +
    cdef int eggs(unsigned long l, float f):
        ...
    +
    + When a parameter of a Python function is declared to have a C data type, it is passed in as a Python object and automatically converted to a C value, if possible. Automatic conversion is currently only possible for numeric @@ -111,17 +219,22 @@

    C functions, on the other hand, can have parameters of any type, since they're passed in directly using a normal C function call.

    +

    Python objects as parameters and return values

    + If no type is specified for a parameter or return value, it is assumed to be a Python object. (Note that this is different from the C convention, where it would default to int.) For example, the following defines a C function that takes two Python objects as parameters and returns a Python object: -
    cdef spamobjs(x, y):
        ...
    +
    +
    cdef spamobjs(x, y):
        ...
    +
    + Reference counting for these objects is performed automatically according to the standard Python/C API rules (i.e. borrowed references are taken as parameters and a new reference is returned). @@ -129,126 +242,217 @@ as a Python object. This can be useful if the name being declared would otherwise be taken as the name of a type, for example,

    + -
    cdef ftang(object int):
        ...
    +
    +
    cdef ftang(object int):
        ...
    +
    + declares a parameter called int which is a Python object. You can also use object as the explicit return type of a function, e.g. -
    cdef object ftang(object int):
        ...
    +
    +
    cdef object ftang(object int):
        ...
    +
    + In the interests of clarity, it is probably a good idea to always be explicit about object parameters in C functions.

    C variable and type definitions

    + The cdef statement is also used to declare C variables, either local or module-level: -
    cdef int i, j, k
    cdef float f, g[42], *h
    +
    +
    cdef int i, j, k
    cdef float f, g[42], *h
    +
    + and C struct, union or enum types: -
    cdef struct Grail:
        int age
        float volume
    -
    cdef union Food:
        char *spam
        float *eggs
    -
    cdef enum CheeseType:
        cheddar, edam, 
        camembert
    -
    cdef enum CheeseState:
        hard = 1
        soft = 2
        runny = 3
    +
    +
    cdef struct Grail:
        int age
        float volume
    + + +
    cdef union Food:
        char *spam
        float *eggs
    + + +
    cdef enum CheeseType:
        cheddar, edam, 
        camembert
    + + +
    cdef enum CheeseState:
        hard = 1
        soft = 2
        runny = 3
    +
    + There is currently no special syntax for defining a constant, but you can use an anonymous enum declaration for this purpose, for example,
    cdef enum:
    +     tons_of_spam = 3
    + Note that the words struct, union and enum are used only when defining a type, not when referring to it. For example, to declare a variable pointing to a Grail you would write -
    cdef Grail *gp
    +
    +
    cdef Grail *gp
    +
    + and not -
    cdef struct Grail *gp # WRONG
    +
    +
    cdef struct Grail *gp # WRONG
    +
    + There is also a ctypedef statement for giving names to types, e.g. -
    ctypedef unsigned long ULong
    -
    ctypedef int *IntPtr
    +
    +
    ctypedef unsigned long ULong
    + + +
    ctypedef int *IntPtr
    +
    +

    Automatic type conversions

    + In most situations, automatic conversions will be performed for the basic numeric and string types when a Python object is used in a context requiring a C value, or vice versa. The following table summarises the conversion possibilities.
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    C types
    +
    From Python types
    +
    To Python types
    +
    [unsigned] char
    + [unsigned] short
    + int, long
    int, long
    +
    int
    +
    unsigned int
    + unsigned long
    + [unsigned] long long
    +
    int, long
    +
    +
    long
    +
    +
    float, double, long double
    +
    int, long, float
    +
    float
    +
    char *
    +
    str
    +
    str
    +
    +
    +

    Caveats when using a Python string in a C context

    + You need to be careful when using a Python string in a context expecting a char *. In this situation, a pointer to the contents of the Python string is used, which is only valid as long as the Python string exists. So you @@ -257,12 +461,16 @@ the Python string will live long enough, you will need to copy the C string.
    +
    + Pyrex detects and prevents some mistakes of this kind. For instance, if you attempt something like
    +
    cdef char *s
    s = pystring1 + pystring2
    + then Pyrex will produce the error message "Obtaining char * from temporary Python value". The reason is that concatenating the two Python strings produces a new Python string object that is referenced only by a temporary internal @@ -270,28 +478,37 @@ the temporary variable will be decrefed and the Python string deallocated, leaving s dangling. Since this code could not possibly work, Pyrex refuses to compile it.
    +
    + The solution is to assign the result of the concatenation to a Python variable, and then obtain the char * from that, i.e.
    +
    cdef char *s
    p = pystring1 + pystring2
    s = p
    + It is then your responsibility to hold the reference p for as long as necessary.
    +
    + Keep in mind that the rules used to detect such errors are only heuristics. Sometimes Pyrex will complain unnecessarily, and sometimes it will fail to detect a problem that exists. Ultimately, you need to understand the issue and be careful what you do.
    +
    +

    Scope rules

    + Pyrex determines whether a variable belongs to a local scope, the module scope, or the built-in scope completely statically. As with Python, assigning to a variable which is not otherwise declared implicitly declares @@ -304,35 +521,49 @@ declare a name to be a module-level name when there would otherwise not be any indication of this, for example,

    +
    global __name__
    + print __name__
    + Without the global statement, the above would print the name of the builtins module.
    +
    + Note: A consequence of these rules is that the module-level scope behaves the same way as a Python local scope if you refer to a variable before assigning to it. In particular, tricks such as the following will not work in Pyrex:
    + -
    try:
      x = True
    except NameError:
      True = 1
    +
    +
    try:
      x = True
    except NameError:
      True = 1
    +
    + because, due to the assignment, the True will always be looked up in the module-level scope. You would have to do something like this instead:
    + -
    import __builtin__
    try:
    True = __builtin__.True
    except AttributeError:
    True = 1
    +
    +
    import __builtin__
    try:
    True = __builtin__.True
    except AttributeError:
    True = 1
    +
    +

    Statements and expressions

    + Control structures and expressions follow Python syntax for the most part. When applied to Python objects, they have the same semantics as in Python (unless otherwise noted). Most of the Python operators can also be applied @@ -341,87 +572,128 @@ are performed automatically between Python objects and C numeric or string types.

    +

    Reference counts are maintained automatically for all Python objects, and all Python operations are automatically checked for errors, with appropriate action taken.

    +

    Differences between C and Pyrex expressions

    + There are some differences in syntax and semantics between C expressions and Pyrex expressions, particularly in the area of C constructs which have no direct equivalent in Python.
    +
      -
    • An integer literal without an L suffix is treated as a C constant, and will be truncated to whatever size your C compiler thinks appropriate. With an L suffix, it will be converted to Python long integer (even if it would be small enough to fit into a C int).
      + +
    • An integer literal without an L suffix is treated as a C constant, and will be truncated to whatever size your C compiler thinks appropriate. With an L suffix, it will be converted to Python long integer (even if it would be small enough to fit into a C int).
      +
      +
    • -
    • There is no -> operator in Pyrex. Instead of p->x, + +
    • There is no -> operator in Pyrex. Instead of p->x, use p.x
    • +  
    • There is no * operator in Pyrex. Instead of *p, use p[0]
    • +  
    • There is an & operator, with the same semantics as in C.
    • +  
    • The null C pointer is called NULL, not 0 (and NULL is a reserved word).
    • +  
    • Character literals are written with a c prefix, for example:
    • -
        -
        c'X'
        -
      + + +
        + + +
        c'X'
        + + +
      +
    • Type casts are written <type>value , for example:
    • -
        -
        cdef char *p, float *q
        p = <char*>q
        -
      + + +
        + + +
        cdef char *p, float *q
        p = <char*>q
        + + +
      + Warning: Don't attempt to use a typecast to convert between Python and C data types -- it won't do the right thing. Leave Pyrex to perform the conversion automatically.
    +

    Integer for-loops

    + You should be aware that a for-loop such as
    for i in range(n):
    +     ...
    + won't be very fast, even if i and n are declared as C integers, because range is a Python function. For iterating over ranges of integers, Pyrex has another form of for-loop:
    for i from 0 <= i < n:
    +     ...
    + If the loop variable and the lower and upper bounds are all C integers, this form of loop will be much faster, because Pyrex will translate it into pure C code.

    Some things to note about the for-from loop:

    +
      +
    • The target expression must be a variable name.
    • +
    • The name between the lower and upper bounds must be the same as the target name.
    • +
    • The direction of iteration is determined by the relations. If they are both from the set {<, <=} then it is upwards; if they are both from the set {>, >=} then it is downwards. (Any other combination is disallowed.)
    • +
    + Like other Python looping statements, break and continue may be used in the body, and the loop may have an else clause. -


    +

    +

    +

    Error return values

    + If you don't do anything special, a function declared with cdef that does not return a Python object has no way of reporting Python exceptions to its caller. If an exception is detected in such a function, a warning message is printed and the exception is ignored. @@ -429,10 +701,13 @@ to propagate exceptions to its caller, you need to declare an exception value for it. Here is an example:

    +
    cdef int spam() except -1:
    +     ...
    + With this declaration, whenever an exception occurs inside spam, it will immediately return with the value -1. Furthermore, whenever a call to spam returns -1, an exception will be assumed @@ -442,77 +717,107 @@ reserve one entirely for signalling errors, you can use an alternative form of exception value declaration:

    +
    cdef int spam() except? -1:
    +     ...
    - The "?" indicates that the value -1 only indicates a possible error. In this case, Pyrex generates a call to PyErr_Occurredif the + + The "?" indicates that the value -1 only indicates a possible error. In this case, Pyrex generates a call to PyErr_Occurred if the exception value is returned, to make sure it really is an error.

    There is also a third form of exception value declaration:

    +
    cdef int spam() except *:
    +     ...
    + This form causes Pyrex to generate a call to PyErr_Occurred after - every call to spam, regardless of what value it returns. If you have + every call to spam, regardless of what value it returns. If you have a function returning void that needs to propagate errors, you will have to use this form, since there isn't any return value to test.

    Some things to note:

    +
      -
    • Currently, exception values can only declared for functions returning - an integer, float or pointer type, and the value must be a literal, - not an expression (although it can be negative). The only possible pointer + +
    • Exception values can only declared for functions returning + an integer, enum, float or pointer type, and the value must be a constant expression. The only possible pointer exception value is NULL. Void functions can only use the except * form.
    • -
      + +  
    • The exception value specification is part of the signature of the function. If you're passing a pointer to a function as a parameter or assigning it to a variable, the declared type of the parameter or variable must have the same exception value specification (or lack thereof). Here is an example of a pointer-to-function declaration with an exception value:
    • -
        -
        int (*grail)(int, char *) except -1
        -
      + + +
        + + +
        int (*grail)(int, char *) except -1
        + + +
      +
    • You don't need to (and shouldn't) declare exception values for functions which return Python objects. Remember that a function with no declared return type implicitly returns a Python object.
    • +
    +

    Checking return values of non-Pyrex functions

    + It's important to understand that the except clause does not cause an error to be raised when the specified value is returned. For example, you can't write something like -
    cdef extern FILE *fopen(char *filename, char *mode) except NULL # WRONG!
    +
    +
    cdef extern FILE *fopen(char *filename, char *mode) except NULL # WRONG!
    +
    + and expect an exception to be automatically raised if a call to fopen returns NULL. The except clause doesn't work that way; its only purpose is for propagating exceptions that have already been raised, either by a Pyrex function or a C function that calls Python/C API routines. To get an exception from a non-Python-aware function such as fopen, you will have to check the return value and raise it yourself, for example, -
    cdef FILE *p
    p = fopen("spam.txt", "r")
    if p == NULL:
        raise SpamError("Couldn't open the spam file")
    +
    +
    cdef FILE *p
    p = fopen("spam.txt", "r")
    if p == NULL:
        raise SpamError("Couldn't open the spam file")
    +
    + -


    +

    +

    +

    The include statement

    + For convenience, a large Pyrex module can be split up into a number of files which are put together using the include statement, for example -
    include "spamstuff.pxi"
    +
    +
    include "spamstuff.pxi"
    +
    + The contents of the named file are textually included at that point. The included file can contain any complete top-level Pyrex statements, including other include statements. The include statement itself can @@ -524,11 +829,14 @@ and it is expected that use of the include statement for this purpose will be phased out altogether in future versions.

    + -


    Interfacing with External +

    +
    Interfacing with External C Code

    + One of the main uses of Pyrex is wrapping existing libraries of C code. This is achieved by using external declarations to declare the C functions and variables from the library that you want to use. @@ -540,23 +848,32 @@ allow Python code to call C code, it can also be used to allow C code to call Python code.

    +

    External declarations

    + By default, C functions and variables declared at the module level are local to the module (i.e. they have the C static storage class). They can also be declared extern to specify that they are defined elsewhere, for example: -
    cdef extern int spam_counter
    -
    cdef extern void order_spam(int tons)
    +
    +
    cdef extern int spam_counter
    + + +
    cdef extern void order_spam(int tons)
    +
    +
    +

    Referencing C header files

    + When you use an extern definition on its own as in the examples above, Pyrex includes a declaration for it in the generated C file. This can cause problems if the declaration doesn't exactly match the declaration that will @@ -566,26 +883,41 @@

    To achieve this, you can tell Pyrex that the declarations are to be found in a C header file, like this:

    + -
    cdef extern from "spam.h":
    -
        int spam_counter
    -
        void order_spam(int tons)
    +
    +
    cdef extern from "spam.h":
    + + +
        int spam_counter
    + + +
        void order_spam(int tons)
    +
    + The cdef extern from clause does three things:
      +
    1. It directs Pyrex to place a #include statement for the named header file in the generated C code.
      +
    2. +  
    3. It prevents Pyrex from generating any C code for the declarations found in the associated block.
      +
    4. +  
    5. It treats all declarations within the block as though they started with cdef extern.
    6. +
    + It's important to understand that Pyrex does not itself read the C header file, so you still need to provide Pyrex versions of any declarations from it that you use. However, the Pyrex declarations don't always have to @@ -593,83 +925,129 @@
      +
    1. Don't use const. Pyrex doesn't know anything about const, so just leave it out. Most of the time this shouldn't cause any problem, although on rare occasions you might have to use a cast. 1
      +
    2. +  
    3. Leave out any platform-specific extensions to C declarations such as __declspec().
      +
    4. +  
    5. If the header file declares a big struct and you only want to use a few members, you only need to declare the members you're interested in. Leaving the rest out doesn't do any harm, because the C compiler will use the full definition from the header file.
      +
      + In some cases, you might not need any of the struct's members, in which case you can just put pass in the body of the struct declaration, e.g.
      +
      +     cdef extern from "foo.h":
      +         struct spam:
      +             pass

      +
      + Note that you can only do this inside a cdef extern from block; struct declarations anywhere else must be non-empty.
      +
      +
    6. +
    7. If the header file uses typedef names such as size_t to refer to platform-dependent flavours of numeric types, you will need a corresponding ctypedef statement, but you don't need to match the type exactly, just use something of the right general kind (int, float, etc). For example,
    8. -
        -
        ctypedef int size_t
        -
      + + +
        + + +
        ctypedef int size_t
        + + +
      + will work okay whatever the actual size of a size_t is (provided the header file defines it correctly).
      +  
    9. If the header file uses macros to define constants, translate them into a dummy enum declaration.
      +
    10. +  
    11. If the header file defines a function using a macro, declare it as though it were an ordinary function, with appropriate argument and result types.
    12. +
    + A few more tricks and tips:
      +
    • If you want to include a C header because it's needed by another header, but don't want to use any declarations from it, put pass in the extern-from block:
    • +
    +
      -
        + + +
          + cdef extern from "spam.h":
          -     pass
        + +     pass +
      +
    +
      +
    • If you want to include some external declarations, but don't want to specify a header file (because it's included by some other header that you've already included) you can put * in place of the header file name:
    • +
    + -
    cdef extern from *:
    +
    +
    cdef extern from *:
    +     ...
    +
    +

    Styles of struct, union and enum declaration

    + There are two main ways that structs, unions and enums can be declared in C header files: using a tag name, or using a typedef. There are also some variations based on various combinations of these. @@ -680,93 +1058,158 @@ above corresponds to the use of a tag name. To get the other style, you prefix the declaration with ctypedef, as illustrated below.

    +

    The following table shows the various possible styles that can be found in a header file, and the corresponding Pyrex declaration that you should put in the cdef exern from block. Struct declarations are used as an example; the same applies equally to union and enum declarations.

    +

    Note that in all the cases below, you refer to the type in Pyrex code simply as Foo, not struct Foo.
    -   + +   +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -
     C codePossibilities for corresponding Pyrex codeComments
    1struct Foo {
    +   ...
    + };
    cdef struct Foo:
    +   ...
    Pyrex will refer to the type as struct Foo in the generated C code.
    2typedef struct {
    +   ...
    + } Foo;
    ctypedef struct Foo:
    +   ...
    Pyrex will refer to the type simply as Foo in the generated C code.
    3typedef struct foo {
    +   ...
    + } Foo;
    cdef struct foo:
    +   ...
    + ctypedef foo Foo #optional
    If the C header uses both a tag and a typedef with different names, you can use either form of declaration in Pyrex (although if you need to forward reference the type, you'll have to use the first form).
    ctypedef struct Foo:
    +   ...
    4typedef struct Foo {
    +   ...
    + } Foo;
    cdef struct Foo:
    +   ...
    If the header uses the same name for the tag and the typedef, you won't be able to include a ctypedef for it -- but then, it's not necessary.
    + + + + +

    +

    Accessing Python/C API routines

    + One particular use of the cdef extern from statement is for gaining access to routines in the Python/C API. For example, -
    cdef extern from "Python.h":
    -
        object PyString_FromStringAndSize(char *s, int len)
    +
    +
    cdef extern from "Python.h":
    + + +
        object PyString_FromStringAndSize(char *s, int len)
    +
    + will allow you to create Python strings containing null bytes.

    +

    Resolving naming conflicts - C name specifications

    + Each Pyrex module has a single module-level namespace for both Python and C names. This can be inconvenient if you want to wrap some external C functions and provide the Python user with Python functions of the same @@ -777,22 +1220,30 @@ the facilities described in the section on sharing declarations between Pyrex modules.

    +

    The other way is to use a c name specification to give different Pyrex and C names to the C function. Suppose, for example, that you want to wrap an external function called eject_tomato. If you declare it as

    + -
    cdef extern void c_eject_tomato "eject_tomato" (float speed)
    +
    +
    cdef extern void c_eject_tomato "eject_tomato" (float speed)
    +
    + then its name inside the Pyrex module will be c_eject_tomato, whereas its name in C will be eject_tomato. You can then wrap it with -
    def eject_tomato(speed):
      c_eject_tomato(speed)
    +
    +
    def eject_tomato(speed):
      c_eject_tomato(speed)
    +
    + so that users of your module can refer to it as eject_tomato.

    Another use for this feature is referring to external names that happen @@ -800,26 +1251,38 @@ called print, you can rename it to something else in your Pyrex module.

    +

    As well as functions, C names can be specified for variables, structs, unions, enums, struct and union members, and enum values. For example,

    + -
    cdef extern int one "ein", two "zwei"
    cdef extern float three "drei"

    cdef struct spam "SPAM":
      int i "eye"
    +
    +
    cdef extern int one "ein", two "zwei"
    cdef extern float three "drei"

    cdef struct spam "SPAM":
      int i "eye"
    + cdef enum surprise "inquisition":
    +   first "alpha"
    +   second "beta" = 3
    +

    Public Declarations

    + You can make C variables and functions defined in a Pyrex module accessible to external C code (or another Pyrex module) using the public keyword, as follows: -
    cdef public int spam # public variable declaration

    cdef public void grail(int num_nuns): # public function declaration
    +

    cdef public int spam # public variable declaration +

    cdef public void grail(int num_nuns): # public function declaration
    +     ...

    +
    + If there are any public declarations in a Pyrex module, a .h file is generated containing equivalent C declarations for inclusion in other C code.

    Pyrex also generates a .pxi file containing Pyrex versions of the @@ -829,122 +1292,191 @@ the dynamic linker at run time. I haven't tested this, so I can't say how well it will work on the various platforms.

    +
    NOTE: If all you want to export is an extension type, there is now a better way -- see Sharing Declarations Between Pyrex Modules.
    + -


    Extension Types +

    +
    Extension Types

    + One of the most powerful features of Pyrex is the ability to easily create new built-in Python types, called extension types. This is a major topic in itself, so there is a  separate page devoted to it. -


    Sharing Declarations Between Pyrex Modules +

    +
    Sharing Declarations Between Pyrex Modules

    + Pyrex 0.8 introduces a substantial new set of facilities allowing a Pyrex module to easily import and use C declarations and extension types from another Pyrex module. You can now create a set of co-operating Pyrex modules just as easily as you can create a set of co-operating Python modules. There is a separate page devoted to this topic. -


    Limitations +

    +
    Limitations

    +

    Unsupported Python features

    + Pyrex is not quite a full superset of Python. The following restrictions apply:
  • Function definitions (whether using def or cdef) cannot be nested within other function definitions.
    +
  • +  
  • Class definitions can only appear at the top level of a module, not inside a function.
    +
  • +  
  • The import * form of import is not allowed anywhere (other forms of the import statement are fine, though).
    +
  • +  
  • Generators cannot be defined in Pyrex.
    +
    +
  • +
  • The globals() and locals() functions cannot be used.
  • +
    + The above restrictions will most likely remain, since removing them would be difficult and they're not really needed for Pyrex's intended applications.

    There are also some temporary limitations, which may eventually be lifted, including:

    +
  • Class and function definitions cannot be placed inside control structures.
    +
  • +  
  • In-place arithmetic operators (+=, etc) are not yet supported.
    +
  • +  
  • List comprehensions are not yet supported.
    +
  • +  
  • There is no support for Unicode.
    +
  • +  
  • Special methods of extension types cannot have functioning docstrings.
    +
    +
  • +
  • The use of string literals as comments is not recommended at present, because Pyrex doesn't optimize them away, and won't even accept them in places where executable statements are not allowed.
  • +
    +

    Semantic differences between Python and Pyrex

    +

    Behaviour of class scopes

    + In Python, referring to a method of a class inside the class definition, i.e. while the class is being defined, yields a plain function object, but in Pyrex it yields an unbound method2. A consequence of this is that the usual idiom for using the classmethod and staticmethod functions, e.g. -
    class Spam:
    -
      def method(cls):
        ...
    -
      method = classmethod(method)
    +
    +
    class Spam:
    + + +
      def method(cls):
        ...
    + + +
      method = classmethod(method)
    +
    + will not work in Pyrex. This can be worked around by defining the function outside the class, and then assigning the result of classmethod or staticmethod inside the class, i.e. -
    def Spam_method(cls):
      ...
    -
    class Spam:
    -
      method = classmethod(Spam_method)
    +
    +
    def Spam_method(cls):
      ...
    + + +
    class Spam:
    + + +
      method = classmethod(Spam_method)
    +
    + -


    Footnotes

    +

    +
    Footnotes +

    + 1. A problem with const could arise if you have something like -
    cdef extern from "grail.h":
      char *nun
    +
    +
    cdef extern from "grail.h":
      char *nun
    +
    + where grail.h actually contains -
    extern const char *nun;
    +
    +
    extern const char *nun;
    +
    + and you do -
    cdef void languissement(char *s):
      #something that doesn't change s
    -
    ...
    -
    languissement(nun)
    +
    +
    cdef void languissement(char *s):
      #something that doesn't change s
    + + +
    ...
    + + +
    languissement(nun)
    +
    + which will cause the C compiler to complain. You can work around it by casting away the constness: -
    languissement(<char *>nun)
    +
    +
    languissement(<char *>nun)
    +
    +
    2. The reason for the different behaviour of class scopes is that Pyrex-defined Python functions are PyCFunction objects, @@ -953,8 +1485,12 @@ around this, Pyrex wraps each method in an unbound method object itself before storing it in the class's dictionary.
    +  
    +
    - \ No newline at end of file + + + \ No newline at end of file Modified: lxml/pyrex/Doc/sharing.html ============================================================================== --- lxml/pyrex/Doc/sharing.html (original) +++ lxml/pyrex/Doc/sharing.html Wed Feb 14 15:20:32 2007 @@ -1,106 +1,192 @@ - - - Sharing Declarations Between Pyrex Modules + + + + + + + + + Sharing Declarations Between Pyrex Modules + -


    Sharing Declarations Between Pyrex Modules + + +

    +
    Sharing Declarations Between Pyrex Modules

    + This section describes a new set of facilities introduced in Pyrex 0.8 for making C declarations and extension types in one Pyrex module available for use in another Pyrex module. These facilities are closely modelled on the Python import mechanism, and can be thought of as a compile-time version of it.

    Contents

    - + It cannot currently contain any non-extern C function or variable declarations (although this may be possible in a future version).

    It cannot contain the implementations of any C or Python functions, or any Python class definitions, or any executable statements.

    -
    NOTE: You don't need to (and shouldn't) declare anything in a + + +
    NOTE: You don't need to (and shouldn't) declare anything in a declaration file public in order to make it available to other Pyrex modules; its mere presence in a definition file does that. You only need a public declaration if you want to make something available to external C code.
    -

    What an Implementation File contains

    + + +

    What an Implementation File contains

    + An implementation file can contain any kind of Pyrex statement, although there are some restrictions on the implementation part of an extension type if the corresponding definition file also defines that type (see below).

    The cimport statement

    + The cimport statement is used in a definition or implementation file to gain access to names declared in another definition file. Its syntax exactly parallels that of the normal Python import statement:
    cimport module [, module...]
    -
    from module cimport name + + +
    from module cimport name [as name] [, name [as name] ...]
    + Here is an example. The file on the left is a definition file which exports a C data type. The file on the right is an implementation file which imports and uses it.
    -   + +   +
    + + + + + + + + + -
    dishes.pxdrestaurant.pyx
    cdef enum otherstuff:
    -     sausage, eggs, lettuce

    cdef struct spamdish:
    + +     sausage, eggs, lettuce +

    cdef struct spamdish:
    +     int oz_of_spam
    +     otherstuff filler

    +
    cimport dishes
    - from dishes cimport spamdish

    cdef void prepare(spamdish *d):
    + + from dishes cimport spamdish +

    cdef void prepare(spamdish *d):
    +     d.oz_of_spam = 42
    +     d.filler = dishes.sausage

    -

    def serve():
    + + +

    def serve():
    +     spamdish d
    +     prepare(&d)
    +     print "%d oz spam, filler no. %d" % \
    +          (d->oz_of_spam, d->otherstuff)

    +
    -

    It is important to understand that the cimport statement can only + + + + + + +

    It is important to understand that the cimport statement can only be used to import C data types, external C functions and variables, and extension types. It cannot be used to import any Python objects, and (with one exception) it doesn't imply any Python import at run time. If you want to refer to any Python names from a module that you have cimported, you will have to include a regular import statement for it as well.

    -

    The exception is that when you use cimport to import an extension + + +

    The exception is that when you use cimport to import an extension type, its type object is imported at run time and made available by the name under which you imported it. Using cimport to import extension types is covered in more detail below.

    -

    Search paths for definition files

    + + +

    Search paths for definition files

    + When you cimport a module called modulename, the Pyrex compiler searches for a file called modulename.pxd along the search path for include files, as specified by -I command line options. @@ -108,94 +194,172 @@ definition file modulename.pxd is first searched for along the same path, and if found, it is processed before processing the .pyx file.

    -

    Using cimport to resolve naming + + +

    Using cimport to resolve naming conflicts

    + The cimport mechanism provides a clean and simple way to solve the problem of wrapping external C functions with Python functions of the same name. All you need to do is put the extern C declarations into a .pxd file for an imaginary module, and cimport that module. You can then refer to the C functions by qualifying them with the name of the module. Here's an example:
    -   + +   +
    + + + + + + + - + -
    c_lunch.pxdlunch.pyx
    cdef extern from "lunch.h":
    +     void eject_tomato(float)
    cimport c_lunch

    def eject_tomato(float speed):
    + +

    cimport c_lunch +

    def eject_tomato(float speed):
    +     c_lunch.eject_tomato(speed)

    +
    -

    You don't need any c_lunch.pyx file, because the only things + + + + + + +

    You don't need any c_lunch.pyx file, because the only things defined in c_lunch.pxd are extern C entities. There won't be any actual c_lunch module at run time, but that doesn't matter -- c_lunch has done its job of providing an additional namespace at compile time.

    -

    Sharing Extension Types

    + + +

    Sharing Extension Types

    + An extension type declaration can also be split into two parts, one in a definition file and the other in the corresponding implementation file.
    +
    + The definition part of the extension type can only declare C attributes and C methods, not Python methods, and it must declare all of that type's C attributes and C methods.
    +
    + The implementation part must implement all of the C methods declared in the definition part, and may not add any further C attributes. It may also define Python methods.

    Here is an example of a module which defines and exports an extension type, and another module which uses it.
    -   + +   +
    + + + + + + + + + + + + + + + -
    Shrubbing.pxdShrubbing.pyx
    cdef class Shrubbery:
    +     cdef int width
    +     cdef int length
    cdef class Shrubbery:
    +     def __new__(self, int w, int l):
    +         self.width = w
    +         self.length = l -

    def standard_shrubbery():
    + +

    def standard_shrubbery():
    +     return Shrubbery(3, 7)

    +
    Landscaping.pyx
    cimport Shrubbing
    - import Shrubbing

    cdef Shrubbing.Shrubbery sh
    + + import Shrubbing +

    cdef Shrubbing.Shrubbery sh
    + sh = Shrubbing.standard_shrubbery()
    + print "Shrubbery size is %d x %d" % (sh.width, sh.height)
    +  

    +
    + + + + +

    -

    Some things to note about this example:

    -
      + + +

      Some things to note about this example:

      + + +
        +
      • There is a cdef class Shrubbery declaration in both Shrubbing.pxd and Shrubbing.pyx. When the Shrubbing module is compiled, these two declarations are combined into one.
      • +  
      • In Landscaping.pyx, the cimport Shrubbing declaration allows us to refer to the Shrubbery type as Shrubbing.Shrubbery. -But it doesn't bind the name Shrubbery in Landscaping's module namespace - at run time, so to access Shrubbery.standard_shrubbery we also +But it doesn't bind the name Shrubbing in Landscaping's module namespace + at run time, so to access Shrubbing.standard_shrubbery we also need to import Shrubbing.
      • -
      -
      Back to the Language Overview + + +
    + + +
    Back to the Language Overview
    +
    - \ No newline at end of file + + + Modified: lxml/pyrex/Doc/special_methods.html ============================================================================== --- lxml/pyrex/Doc/special_methods.html (original) +++ lxml/pyrex/Doc/special_methods.html Wed Feb 14 15:20:32 2007 @@ -1,10 +1,22 @@ - - - Special Methods of Extenstion Types + + + + + + + + + Special Methods of Extenstion Types + + -


    Special Methods of Extension Types + + +

    +
    Special Methods of Extension Types

    + This page describes the special methods currently supported by Pyrex extension types. A complete list of all the special methods appears in the table at the bottom. Some of these methods behave differently from their Python counterparts @@ -13,15 +25,21 @@ types, defined with the cdef class statement. It doesn't apply  to classes defined with the Python class statement, where the normal Python rules apply.

    -

    Declaration

    Special methods of extension types must be declared with def, not cdef.
    + + +

    Declaration

    +Special methods of extension types must be declared with def, not cdef.
    +

    Docstrings

    + Currently, docstrings are not fully supported in special methods of extension types. You can place a docstring in the source to serve as a comment, but it won't show up in the corresponding __doc__ attribute at run time. (This is a Python limitation -- there's nowhere in the PyTypeObject data structure to put such docstrings.)

    Initialisation methods: __new__ and __init__

    + There are two methods concerned with initialising the object.

    The __new__ method is where you should perform basic C-level initialisation of the object, including allocation of any C data structures @@ -29,30 +47,41 @@ method, because the object may not yet be a valid Python object when it is called. Therefore, you must not invoke any Python operations which might touch the object; in particular, do not try to call any of its methods.

    -

    Unlike the corresponding method in Python, your __new__ method + + +

    Unlike the corresponding method in Python, your __new__ method is not responsible for creating the object. By the time your __new__ method is called, memory has been allocated for the object and any C attributes it has have been initialised to 0 or null. (Any Python attributes have also been initialised to None, but you probably shouldn't rely on that.) Your __new__ method is guaranteed to be called exactly once.
    +
    + If your extension type has a base type, the __new__ method of the base type is automatically called before your __new__ method is called; you cannot explicitly call the inherited __new__ method. If you need to pass a modified argument list to the base type, you will have to do the relevant part of the initialisation in the __init__ method instead (where the normal rules for calling inherited methods apply).
    +

    -

    Note that the first parameter of the __new__ method is the object + + +

    Note that the first parameter of the __new__ method is the object to be initialised, not the class of the object as it is in Python.

    -

    Any initialisation which cannot safely be done in the __new__ + + +

    Any initialisation which cannot safely be done in the __new__ method should be done in the __init__ method. By the time __init__ is called, the object is a fully valid Python object and all operations are safe. Under some circumstances it is possible for __init__ to be called more than once or not to be called at all, so your other methods should be designed to be robust in such situations.

    -

    Keep in mind that any arguments passed to the constructor will be passed + + +

    Keep in mind that any arguments passed to the constructor will be passed to the __new__ method as well as the __init__ method. If you anticipate subclassing your extension type in Python, you may find it useful to give the __new__ method * and ** arguments so that @@ -60,7 +89,10 @@ which has an __init__ with a different signature will have to override __new__ as well as __init__, which the writer of a Python class wouldn't expect to have to do.

    -

    Finalization method: __dealloc__

    + + +

    Finalization method: __dealloc__

    + The counterpart to the __new__ method is the __dealloc__ method, which should perform the inverse of the __new__ method. Any C data structures that you allocated in your __new__ method @@ -72,15 +104,23 @@ touch the object. In particular, don't call any other methods of the object or do anything which might cause the object to be resurrected. It's best if you stick to just deallocating C data.

    -

    You don't need to worry about deallocating Python attributes of your object, + + +

    You don't need to worry about deallocating Python attributes of your object, because that will be done for you by Pyrex after your __dealloc__ method returns.
    +
    + Note: There is no __del__ method for extension types. (Earlier versions of the Pyrex documentation stated that there was, but this turned out to be incorrect.)
    +

    -

    Arithmetic methods

    + + +

    Arithmetic methods

    + Arithmetic operator methods, such as __add__, behave differently from their Python counterparts. There are no separate "reversed" versions of these methods (__radd__, etc.) Instead, if the first operand @@ -90,509 +130,1002 @@ being "self", and you should test the types of both operands before deciding what to do. If you can't handle the combination of types you've been given, you should return NotImplemented.

    -

    This also applies to the in-place arithmetic method __ipow__. + + +

    This also applies to the in-place arithmetic method __ipow__. It doesn't apply to any of the other in-place methods (__iadd__, etc.) which always take self as the first argument.

    -

    Rich comparisons

    + + +

    Rich comparisons

    + There are no separate methods for the individual rich comparison operations (__eq__, __le__, etc.) Instead there is a single method __richcmp__ which takes an integer indicating which operation is to be performed, as follows:
      -
        -   + + +
          + +   +
        + + - + + - + + - + + + - + + - + + - + + -
        <
        + +
        +
        <
        +
        0
        +
        ==
        + +
        +
        ==
        +
        2
        +
        >
        + +
        +
        >
        +
        4
        <=
        + +
        +
        <=
        +
        1
        +
        !=
        + +
        +
        !=
        +
        3
        +
        >=
        + +
        +
        >=
        +
        5
        -
      + + + + + +
    -

    The __next__ method

    + + +
+ + +

The __next__ method

+ Extension types wishing to implement the iterator interface should define a method called __next__, not next. The Python system will automatically supply a next method which calls your __next__. Do NOT explicitly give your type a next method, -or bad things could happen (see note 3). +or bad things could happen.

Special Method Table

+ This table lists all of the special methods together with their parameter and return types. A parameter named self is of the type the method belongs to. Other untyped parameters are generic Python objects.

You don't have to declare your method as taking these parameter types. If you declare different types, conversions will be performed as necessary.
-   + +   +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -
NameParametersReturn typeDescription
General
__new__self, ... Basic initialisation (no direct Python equivalent)
__init__self, ... Further initialisation
__dealloc__self Basic deallocation (no direct Python equivalent)
__cmp__x, yint3-way comparison
__richcmp__x, y, int opobjectRich comparison (no direct Python equivalent)
__str__selfobjectstr(self)
__repr__selfobjectrepr(self)
__hash__selfintHash function
__call__self, ...objectself(...)
__iter__selfobjectReturn iterator for sequence
__getattr__self, nameobjectGet attribute
__setattr__self, name, val Set attribute
__delattr__self, name Delete attribute
Arithmetic operators
__add__x, yobjectbinary + operator
__sub__x, yobjectbinary - operator
__mul__x, yobject* operator
__div__x, yobject/  operator for old-style division
__floordiv__x, yobject//  operator
__truediv__x, yobject/  operator for new-style division
__mod__x, yobject% operator
__divmod__x, yobjectcombined div and mod
__pow__x, y, zobject** operator or pow(x, y, z)
__neg__selfobjectunary - operator
__pos__selfobjectunary + operator
__abs__selfobjectabsolute value
__nonzero__selfintconvert to boolean
__invert__selfobject~ operator
__lshift__x, yobject<< operator
__rshift__x, yobject>> operator
__and__x, yobject& operator
__or__x, yobject| operator
__xor__x, yobject^ operator
Numeric conversions
__int__selfobjectConvert to integer
__long__selfobjectConvert to long integer
__float__selfobjectConvert to float
__oct__selfobjectConvert to octal
__hex__selfobjectConvert to hexadecimal
In-place arithmetic operators
__iadd__self, xobject+= operator
__isub__self, xobject-= operator
__imul__self, xobject*= operator
__idiv__self, xobject/= operator for old-style division
__ifloordiv__self, xobject//= operator
__itruediv__self, xobject/= operator for new-style division
__imod__self, xobject%= operator
__ipow__x, y, zobject**= operator
__ilshift__self, xobject<<= operator
__irshift__self, xobject>>= operator
__iand__self, xobject&= operator
__ior__self, xobject|= operator
__ixor__self, xobject^= operator
Sequences and mappings
__len__selfintlen(self)
__getitem__self, xobjectself[x]
__setitem__self, x, y self[x] = y
__delitem__self, x del self[x]
__getslice__self, int i, int jobjectself[i:j]
__setslice__self, int i, int j, x self[i:j] = x
__delslice__self, int i, int j del self[i:j]
__contains__self, xintx in self
Iterators
__next__selfobjectGet next item (called next in Python)
Buffer interface  (no Python equivalents - see note 1)
__getreadbuffer__self, int i, void **p  
__getwritebuffer__self, int i, void **p  
__getsegcount__self, int *p  
__getcharbuffer__self, int i, char **p  
Descriptor objects  (no Python equivalents - see note 2)
__get__self, instance, classobjectGet value of attribute
__set__self, instance, value Set value of attribute
__delete__self, instance Delete attribute
+ + + + +

-

Note 1: The buffer interface is intended for use by C code and is not + + +

Note 1: The buffer interface is intended for use by C code and is not directly accessible from Python. It is described in the Python/C API Reference Manual under sections 6.6 and 10.6.

-

Note 2: Descriptor objects are part of the support mechanism for new-style + + +

Note 2: Descriptor objects are part of the support mechanism for new-style Python classes. See the discussion of descriptors in the Python documentation. See also PEP 252, "Making Types Look More Like Classes", and PEP 253, "Subtyping Built-In Types".

-

Note 3: If your type defines a __new__ method, any method called - new that you define will be overwritten with the system-supplied - new at module import time.

-
+
- \ No newline at end of file + + + Added: lxml/pyrex/Makefile ============================================================================== --- (empty file) +++ lxml/pyrex/Makefile Wed Feb 14 15:20:32 2007 @@ -0,0 +1,18 @@ +VERSION = 0.9.5.1a + +version: + @echo "Setting version to $(VERSION)" + @echo "version = '$(VERSION)'" > Pyrex/Compiler/Version.py + +#check_contents: +# @if [ ! -d Pyrex/Distutils ]; then \ +# echo Pyrex/Distutils missing; \ +# exit 1; \ +# fi + +clean: + @echo Cleaning Source + @rm -f *.pyc */*.pyc */*/*.pyc + @rm -f *~ */*~ */*/*~ + @rm -f core */core + @(cd Demos; $(MAKE) clean) Modified: lxml/pyrex/Pyrex/Compiler/Code.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Code.py (original) +++ lxml/pyrex/Pyrex/Compiler/Code.py Wed Feb 14 15:20:32 2007 @@ -22,8 +22,9 @@ in_try_finally = 0 - def __init__(self, outfile_name): - self.f = open_new_file(outfile_name) + def __init__(self, f): + #self.f = open_new_file(outfile_name) + self.f = f self.level = 0 self.bol = 1 self.marker = None @@ -80,6 +81,7 @@ def init_labels(self): self.label_counter = 0 + self.labels_used = {} self.return_label = self.new_label() self.new_error_label() self.continue_label = None @@ -134,9 +136,17 @@ new_labels.append(old_label) self.set_all_labels(new_labels) return old_labels + + def use_label(self, lbl): + self.labels_used[lbl] = 1 def put_label(self, lbl): - self.putln("%s:;" % lbl) + if lbl in self.labels_used: + self.putln("%s:;" % lbl) + + def put_goto(self, lbl): + self.use_label(lbl) + self.putln("goto %s;" % lbl) def put_var_declarations(self, entries, static = 0, dll_linkage = None, definition = True): @@ -146,28 +156,23 @@ def put_var_declaration(self, entry, static = 0, dll_linkage = None, definition = True): - #print "Code.put_var_declaration:", entry.name, "definition =", definition + #print "Code.put_var_declaration:", entry.name, "definition =", definition ### visibility = entry.visibility if visibility == 'private' and not definition: return + if not entry.used and visibility == "private": + return + storage_class = "" if visibility == 'extern': storage_class = Naming.extern_c_macro elif visibility == 'public': - if definition: - storage_class = "" - else: + if not definition: storage_class = Naming.extern_c_macro elif visibility == 'private': if static: storage_class = "static" - else: - storage_class = "" if storage_class: self.put("%s " % storage_class) - #if visibility == 'extern' or visibility == 'public' and not definition: - # self.put("%s " % Naming.extern_c_macro) - #elif static and visibility <> 'public': - # self.put("static ") if visibility <> 'public': dll_linkage = None self.put(entry.type.declaration_code(entry.cname, @@ -186,10 +191,6 @@ def as_pyobject(self, cname, type): return typecast(py_object_type, type, cname) - #if type.is_extension_type and type.base_type: - # return "(PyObject *)" + cname - #else: - # return cname def put_incref(self, cname, type): self.putln("Py_INCREF(%s);" % self.as_pyobject(cname, type)) @@ -231,12 +232,13 @@ self.putln("Py_XDECREF(%s); %s = 0;" % ( self.entry_as_pyobject(entry), entry.cname)) - def put_var_decrefs(self, entries): + def put_var_decrefs(self, entries, used_only = 0): for entry in entries: - if entry.xdecref_cleanup: - self.put_var_xdecref(entry) - else: - self.put_var_decref(entry) + if not used_only or entry.used: + if entry.xdecref_cleanup: + self.put_var_xdecref(entry) + else: + self.put_var_decref(entry) def put_var_xdecrefs(self, entries): for entry in entries: @@ -269,13 +271,15 @@ term)) def error_goto(self, pos): + lbl = self.error_label + self.use_label(lbl) return "{%s = %s[%s]; %s = %s; goto %s;}" % ( Naming.filename_cname, Naming.filetable_cname, self.lookup_filename(pos[0]), Naming.lineno_cname, pos[1], - self.error_label) + lbl) def lookup_filename(self, filename): try: Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/ExprNodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/ExprNodes.py Wed Feb 14 15:20:32 2007 @@ -8,7 +8,7 @@ import Naming from Nodes import Node import PyrexTypes -from PyrexTypes import py_object_type, typecast +from PyrexTypes import py_object_type, c_long_type, typecast import Symtab import Options @@ -89,13 +89,23 @@ # - If a temporary was allocated, call release_temp on # all sub-expressions. # - # A default implementation of allocate_temps is - # provided which uses the following abstract method: - # - # calculate_result_code - # - Return a C code fragment evaluating to - # the result. This is only called when the - # result is not a temporary. + # allocate_target_temps + # - Call allocate_temps on sub-nodes and allocate any other + # temps used during assignment. + # - Fill in result_code with a C lvalue if needed. + # - If a rhs node is supplied, call release_temp on it. + # - Call release_temp on sub-nodes and release any other + # temps used during assignment. + # + # calculate_result_code + # - Return a C code fragment evaluating to + # the result. This is only called when the + # result is not a temporary. + # + # target_code + # Called by the default implementation of allocate_target_temps. + # Should return a C lvalue for assigning to the node. The default + # implementation calls calculate_result_code. # # check_const # - Check that this node and its subnodes form a @@ -145,16 +155,6 @@ # - Generate code to perform the deletion. # - Call generate_disposal_code on all sub-expressions. # - # #result_as_extension_type ### OBSOLETE ### - # # Normally, the results of all nodes whose type - # # is a Python object, either generic or an extension - # # type, are returned as a generic Python object, so - # # that they can be passed directly to Python/C API - # # routines. This method is called to obtain the - # # result as the actual type of the node. It is only - # # called when the type is known to actually be an - # # extension type, and nodes whose result can never - # # be an extension type need not implement it. # is_sequence_constructor = 0 @@ -231,12 +231,12 @@ self.analyse_types(env) self.allocate_temps(env) - def analyse_target_expression(self, env): + def analyse_target_expression(self, env, rhs): # Convenience routine performing both the Type # Analysis and Temp Allocation phases for the LHS of # an assignment. self.analyse_target_types(env) - self.allocate_target_temps(env) + self.allocate_target_temps(env, rhs) def analyse_boolean_expression(self, env): # Analyse expression and coerce to a boolean. @@ -298,12 +298,15 @@ # a subnode. return self.is_temp - def allocate_target_temps(self, env): - # Perform allocate_temps for the LHS of an assignment. + def allocate_target_temps(self, env, rhs): + # Perform temp allocation for the LHS of an assignment. if debug_temp_alloc: print self, "Allocating target temps" self.allocate_subexpr_temps(env) self.result_code = self.target_code() + if rhs: + rhs.release_temp(env) + self.release_subexpr_temps(env) def allocate_temps(self, env, result = None): # Allocate temporary variables for this node and @@ -362,9 +365,9 @@ def calculate_result_code(self): self.not_implemented("calculate_result_code") - def release_target_temp(self, env): - # Release temporaries used by LHS of an assignment. - self.release_subexpr_temps(env) +# def release_target_temp(self, env): +# # Release temporaries used by LHS of an assignment. +# self.release_subexpr_temps(env) def release_temp(self, env): # If this node owns a temporary result, release it, @@ -576,7 +579,7 @@ class NullNode(ConstNode): type = PyrexTypes.c_null_ptr_type - value = "0" + value = "NULL" class CharNode(ConstNode): @@ -703,13 +706,14 @@ def analyse_entry(self, env): self.check_identifier_kind() - self.type = self.entry.type - if self.entry.is_declared_generic: + entry = self.entry + self.type = entry.type + if entry.is_declared_generic: self.result_ctype = py_object_type - # Reference to C array turns into pointer to first element. - while self.type.is_array: - self.type = self.type.element_ptr_type() - if self.entry.is_pyglobal or self.entry.is_builtin: + ## Reference to C array turns into pointer to first element. + #while self.type.is_array: + # self.type = self.type.element_ptr_type() + if entry.is_pyglobal or entry.is_builtin: assert self.type.is_pyobject, "Python global or builtin not a Python object" self.is_temp = 1 if Options.intern_names: @@ -727,7 +731,9 @@ self.type = PyrexTypes.error_type def check_identifier_kind(self): + #print "NameNode.check_identifier_kind:", self.entry.name ### entry = self.entry + entry.used = 1 if not (entry.is_const or entry.is_variable or entry.is_builtin or entry.is_cfunction): if self.entry.as_variable: @@ -1071,11 +1077,11 @@ self.index.py_result(), rhs.py_result(), code.error_goto(self.pos))) - self.generate_subexpr_disposal_code(code) else: code.putln( "%s = %s;" % ( self.result_code, rhs.result_code)) + self.generate_subexpr_disposal_code(code) rhs.generate_disposal_code(code) def generate_deletion_code(self, code): @@ -1220,9 +1226,12 @@ function.obj = CloneNode(self.self) func_type = self.function_type() if func_type.is_pyobject: - self.arg_tuple = TupleNode(self.pos, args = self.args) + if self.args: + self.arg_tuple = TupleNode(self.pos, args = self.args) + self.arg_tuple.analyse_types(env) + else: + self.arg_tuple = None self.args = None - self.arg_tuple.analyse_types(env) self.type = py_object_type self.is_temp = 1 else: @@ -1309,11 +1318,15 @@ def generate_result_code(self, code): func_type = self.function_type() if func_type.is_pyobject: + if self.arg_tuple: + arg_code = self.arg_tuple.py_result() + else: + arg_code = "0" code.putln( - "%s = PyObject_Call(%s, %s, 0); if (!%s) %s" % ( + "%s = PyObject_CallObject(%s, %s); if (!%s) %s" % ( self.result_code, self.function.py_result(), - self.arg_tuple.py_result(), + arg_code, self.result_code, code.error_goto(self.pos))) elif func_type.is_cfunction: @@ -1535,9 +1548,9 @@ self.analyse_attribute(env) if self.entry and self.entry.is_cmethod and not self.is_called: error(self.pos, "C method can only be called") - # Reference to C array turns into pointer to first element. - while self.type.is_array: - self.type = self.type.element_ptr_type() + ## Reference to C array turns into pointer to first element. + #while self.type.is_array: + # self.type = self.type.element_ptr_type() if self.is_py_attr: if not target: self.is_temp = 1 @@ -1568,6 +1581,8 @@ obj_type = PyrexTypes.error_type self.entry = entry if entry: + if obj_type.is_extension_type and entry.name == "__weakref__": + error(self.pos, "Illegal use of special attribute __weakref__") if entry.is_variable or entry.is_cmethod: self.type = entry.type self.member = entry.cname @@ -1662,7 +1677,6 @@ code.error_goto(self.pos))) rhs.generate_disposal_code(code) else: - #select_code = self.select_code() select_code = self.result_code if self.type.is_pyobject: rhs.make_owned_reference(code) @@ -1670,7 +1684,8 @@ code.putln( "%s = %s;" % ( select_code, - rhs.result_code)) + rhs.result_as(self.ctype()))) + #rhs.result_code)) rhs.generate_post_assignment_code(code) self.obj.generate_disposal_code(code) @@ -1704,6 +1719,7 @@ # Contains common code for performing sequence unpacking. # # args [ExprNode] + # iterator ExprNode # unpacked_items [ExprNode] or None # coerced_unpacked_items [ExprNode] or None @@ -1725,11 +1741,11 @@ self.is_temp = 1 def analyse_target_types(self, env): - self.unpacked_items = [] # PyTempNode(self.pos, env) + self.iterator = PyTempNode(self.pos, env) + self.unpacked_items = [] self.coerced_unpacked_items = [] for arg in self.args: arg.analyse_target_types(env) - #node = CloneNode(self.unpacked_item) unpacked_item = PyTempNode(self.pos, env) coerced_unpacked_item = unpacked_item.coerce_to(arg.type, env) self.unpacked_items.append(unpacked_item) @@ -1737,27 +1753,39 @@ self.type = py_object_type env.use_utility_code(unpacking_utility_code) - def allocate_target_temps(self, env): - for arg in self.args: - arg.allocate_target_temps(env) - for node in self.coerced_unpacked_items: + def allocate_target_temps(self, env, rhs): + self.iterator.allocate_temps(env) + if rhs: + rhs.release_temp(env) + for arg, node in zip(self.args, self.coerced_unpacked_items): node.allocate_temps(env) - - def release_target_temp(self, env): - for arg in self.args: - arg.release_target_temp(env) - for node in self.coerced_unpacked_items: - node.release_temp(env) + arg.allocate_target_temps(env, node) + #arg.release_target_temp(env) + #node.release_temp(env) + self.iterator.release_temp(env) + +# def release_target_temp(self, env): +# #for arg in self.args: +# # arg.release_target_temp(env) +# #for node in self.coerced_unpacked_items: +# # node.release_temp(env) +# self.iterator.release_temp(env) def generate_result_code(self, code): self.generate_operation_code(code) def generate_assignment_code(self, rhs, code): + code.putln( + "%s = PyObject_GetIter(%s); if (!%s) %s" % ( + self.iterator.result_code, + rhs.py_result(), + self.iterator.result_code, + code.error_goto(self.pos))) + rhs.generate_disposal_code(code) for i in range(len(self.args)): item = self.unpacked_items[i] - unpack_code = "__Pyx_UnpackItem(%s, %s)" % ( - rhs.py_result(), - i) + unpack_code = "__Pyx_UnpackItem(%s)" % ( + self.iterator.py_result()) code.putln( "%s = %s; if (!%s) %s" % ( item.result_code, @@ -1768,14 +1796,13 @@ value_node.generate_evaluation_code(code) self.args[i].generate_assignment_code(value_node, code) code.putln( - "if (__Pyx_EndUnpack(%s, %s) < 0) %s" % ( - rhs.py_result(), - len(self.args), + "if (__Pyx_EndUnpack(%s) < 0) %s" % ( + self.iterator.py_result(), code.error_goto(self.pos))) if debug_disposal_code: print "UnpackNode.generate_assignment_code:" print "...generating disposal code for", rhs - rhs.generate_disposal_code(code) + self.iterator.generate_disposal_code(code) class TupleNode(SequenceNode): @@ -2322,6 +2349,10 @@ def analyse_c_operation(self, env): type1 = self.operand1.type type2 = self.operand2.type + if type1.is_enum: + type1 = PyrexTypes.c_int_type + if type2.is_enum: + type2 = PyrexTypes.c_int_type self.type = self.compute_c_result_type(type1, type2) if not self.type: self.type_error() @@ -2560,10 +2591,13 @@ return 1 if type1.is_pyobject: # type2 will be, too return 1 - elif type1.is_ptr: + elif type1.is_ptr or type1.is_array: return type1.is_null_ptr or type2.is_null_ptr \ - or type1.same_as(type2) - elif (type1.is_numeric and type2.is_numeric + or ((type2.is_ptr or type2.is_array) + and type1.base_type.same_as(type2.base_type)) + elif ((type1.is_numeric and type2.is_numeric + or type1.is_enum and (type1 is type2 or type2.is_int) + or type1.is_int and type2.is_enum) and op not in ('is', 'is_not')): return 1 else: @@ -2819,9 +2853,6 @@ self.type = new_type def calculate_result_code(self): - #return "((%s)%s)" % ( - # self.type.declaration_code(""), - # self.arg.result) return self.arg.result_as(self.type) def generate_result_code(self, code): @@ -2852,6 +2883,8 @@ return self.arg.result_code def generate_result_code(self, code): + if isinstance(self.arg, NoneNode): + return if self.type.typeobj_is_available(): code.putln( "if (!__Pyx_TypeTest(%s, %s)) %s" % ( @@ -2904,12 +2937,14 @@ "Obtaining char * from temporary Python value") def generate_result_code(self, code): - #opnd = self.arg.py_result() function = self.type.from_py_function - code.putln('%s = %s(%s); if (PyErr_Occurred()) %s' % ( + operand = self.arg.py_result() + rhs = "%s(%s)" % (function, operand) + if self.type.is_enum: + rhs = typecast(self.type, c_long_type, rhs) + code.putln('%s = %s; if (PyErr_Occurred()) %s' % ( self.result_code, - function, - self.arg.py_result(), + rhs, code.error_goto(self.pos))) @@ -2995,8 +3030,10 @@ # #------------------------------------------------------------------------------------ -get_name_utility_code = \ +get_name_utility_code = [ """ +static PyObject *__Pyx_GetName(PyObject *dict, char *name); /*proto*/ +""",""" static PyObject *__Pyx_GetName(PyObject *dict, char *name) { PyObject *result; result = PyObject_GetAttrString(dict, name); @@ -3004,10 +3041,12 @@ PyErr_SetString(PyExc_NameError, name); return result; } -""" +"""] -get_name_interned_utility_code = \ +get_name_interned_utility_code = [ """ +static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name); /*proto*/ +""",""" static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name) { PyObject *result; result = PyObject_GetAttr(dict, name); @@ -3015,12 +3054,14 @@ PyErr_SetObject(PyExc_NameError, name); return result; } -""" +"""] #------------------------------------------------------------------------------------ -import_utility_code = \ +import_utility_code = [ """ +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list); /*proto*/ +""",""" static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list) { PyObject *__import__ = 0; PyObject *empty_list = 0; @@ -3056,12 +3097,14 @@ """ % { "BUILTINS": Naming.builtins_cname, "GLOBALS": Naming.module_cname, -} +}] #------------------------------------------------------------------------------------ -get_exception_utility_code = \ +get_exception_utility_code = [ """ +static PyObject *__Pyx_GetExcValue(void); /*proto*/ +""",""" static PyObject *__Pyx_GetExcValue(void) { PyObject *type = 0, *value = 0, *tb = 0; PyObject *result = 0; @@ -3091,41 +3134,48 @@ Py_XDECREF(tb); return result; } -""" +"""] #------------------------------------------------------------------------------------ -unpacking_utility_code = \ +unpacking_utility_code = [ """ +static PyObject *__Pyx_UnpackItem(PyObject *); /*proto*/ +static int __Pyx_EndUnpack(PyObject *); /*proto*/ +""",""" static void __Pyx_UnpackError(void) { PyErr_SetString(PyExc_ValueError, "unpack sequence of wrong size"); } -static PyObject *__Pyx_UnpackItem(PyObject *seq, Py_ssize_t i) { - PyObject *item; - if (!(item = PySequence_GetItem(seq, i))) { - if (PyErr_ExceptionMatches(PyExc_IndexError)) - __Pyx_UnpackError(); - } - return item; +static PyObject *__Pyx_UnpackItem(PyObject *iter) { + PyObject *item; + if (!(item = PyIter_Next(iter))) { + if (!PyErr_Occurred()) + __Pyx_UnpackError(); + } + return item; } -static int __Pyx_EndUnpack(PyObject *seq, Py_ssize_t i) { - PyObject *item; - if (item = PySequence_GetItem(seq, i)) { - Py_DECREF(item); - __Pyx_UnpackError(); - return -1; - } - PyErr_Clear(); - return 0; +static int __Pyx_EndUnpack(PyObject *iter) { + PyObject *item; + if ((item = PyIter_Next(iter))) { + Py_DECREF(item); + __Pyx_UnpackError(); + return -1; + } + else if (!PyErr_Occurred()) + return 0; + else + return -1; } -""" +"""] #------------------------------------------------------------------------------------ -type_test_utility_code = \ +type_test_utility_code = [ """ +static int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); /*proto*/ +""",""" static int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type) { if (!type) { PyErr_Format(PyExc_SystemError, "Missing type object"); @@ -3137,12 +3187,14 @@ obj->ob_type->tp_name, type->tp_name); return 0; } -""" +"""] #------------------------------------------------------------------------------------ -create_class_utility_code = \ +create_class_utility_code = [ """ +static PyObject *__Pyx_CreateClass(PyObject *bases, PyObject *dict, PyObject *name, char *modname); /*proto*/ +""",""" static PyObject *__Pyx_CreateClass( PyObject *bases, PyObject *dict, PyObject *name, char *modname) { @@ -3159,6 +3211,6 @@ Py_XDECREF(py_modname); return result; } -""" +"""] #------------------------------------------------------------------------------------ Added: lxml/pyrex/Pyrex/Compiler/ModuleNode.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Compiler/ModuleNode.py Wed Feb 14 15:20:32 2007 @@ -0,0 +1,1371 @@ +# +# Pyrex - Module parse tree node +# + +import os, time +from cStringIO import StringIO + +import Code +import Naming +import Nodes +import Options +import PyrexTypes +import TypeSlots +import Version + +from Errors import error +from PyrexTypes import py_object_type +from Pyrex.Utils import open_new_file, replace_suffix + +class ModuleNode(Nodes.Node, Nodes.BlockNode): + # doc string or None + # body StatListNode + + def analyse_declarations(self, env): + env.doc = self.doc + self.body.analyse_declarations(env) + + def process_implementation(self, env, result): + self.analyse_declarations(env) + env.check_c_classes() + self.body.analyse_expressions(env) + env.return_type = PyrexTypes.c_void_type + self.generate_c_code(env, result) + self.generate_h_code(env, result) + + def generate_h_code(self, env, result): + public_vars = [] + public_funcs = [] + public_extension_types = [] + for entry in env.var_entries: + if entry.visibility == 'public': + public_vars.append(entry) + for entry in env.cfunc_entries: + if entry.visibility == 'public': + public_funcs.append(entry) + for entry in env.c_class_entries: + if entry.visibility == 'public': + public_extension_types.append(entry) + if public_vars or public_funcs or public_extension_types: + result.h_file = replace_suffix(result.c_file, ".h") + result.i_file = replace_suffix(result.c_file, ".pxi") + h_code = Code.CCodeWriter(open_new_file(result.h_file)) + i_code = Code.PyrexCodeWriter(result.i_file) + header_barrier = "__HAS_PYX_" + env.module_name + h_code.putln("#ifndef %s" % header_barrier) + h_code.putln("#define %s" % header_barrier) + self.generate_extern_c_macro_definition(h_code) + for entry in public_vars: + h_code.putln("%s %s;" % ( + Naming.extern_c_macro, + entry.type.declaration_code( + entry.cname, dll_linkage = "DL_IMPORT"))) + i_code.putln("cdef extern %s" % + entry.type.declaration_code(entry.cname, pyrex = 1)) + for entry in public_extension_types: + self.generate_cclass_header_code(entry.type, h_code) + self.generate_cclass_include_code(entry.type, i_code) + if public_funcs: + sort_public_funcs = [ (func.cname, func) + for func in public_funcs ] + sort_public_funcs.sort() + public_funcs = [ func[1] for func in sort_public_funcs ] + for entry in public_funcs: + h_code.putln( + 'static %s;' % + entry.type.declaration_code("(*%s)" % entry.cname)) + i_code.putln("cdef extern %s" % + entry.type.declaration_code(entry.cname, pyrex = 1)) + h_code.putln( + "static struct {char *s; void **p;} _%s_API[] = {" % + env.module_name) + for entry in public_funcs: + h_code.putln('{"%s", &%s},' % (entry.cname, entry.cname)) + h_code.putln("{0, 0}") + h_code.putln("};") + self.generate_c_api_import_code(env, h_code) + h_code.putln("PyMODINIT_FUNC init%s(void);" % env.module_name) + h_code.putln("#endif /* %s */" % header_barrier) + + def generate_cclass_header_code(self, type, h_code): + #h_code.putln("extern DL_IMPORT(PyTypeObject) %s;" % type.typeobj_cname) + h_code.putln("%s DL_IMPORT(PyTypeObject) %s;" % ( + Naming.extern_c_macro, + type.typeobj_cname)) + self.generate_obj_struct_definition(type, h_code) + + def generate_cclass_include_code(self, type, i_code): + i_code.putln("cdef extern class %s.%s:" % ( + type.module_name, type.name)) + i_code.indent() + var_entries = type.scope.var_entries + if var_entries: + for entry in var_entries: + i_code.putln("cdef %s" % + entry.type.declaration_code(entry.cname, pyrex = 1)) + else: + i_code.putln("pass") + i_code.dedent() + + def generate_c_code(self, env, result): + modules = [] + self.find_referenced_modules(env, modules, {}) + #code = Code.CCodeWriter(result.c_file) + code = Code.CCodeWriter(StringIO()) + code.h = Code.CCodeWriter(StringIO()) + code.init_labels() + self.generate_module_preamble(env, modules, code.h) + + code.putln("") + code.putln("/* Implementation of %s */" % env.qualified_name) + self.generate_const_definitions(env, code) + self.generate_interned_name_decls(env, code) + self.generate_py_string_decls(env, code) + self.body.generate_function_definitions(env, code) + self.generate_interned_name_table(env, code) + self.generate_py_string_table(env, code) + self.generate_c_api_table(env, code) + self.generate_typeobj_definitions(env, code) + self.generate_method_table(env, code) + self.generate_filename_init_prototype(code) + self.generate_module_init_func(modules[:-1], env, code) + self.generate_filename_table(code) + self.generate_utility_functions(env, code) + + for module in modules: + self.generate_declarations_for_module(module, code.h, + definition = module is env) + + f = open_new_file(result.c_file) + f.write(code.h.f.getvalue()) + f.write("\n") + f.write(code.f.getvalue()) + f.close() + result.c_file_generated = 1 + + def find_referenced_modules(self, env, module_list, modules_seen): + if env not in modules_seen: + modules_seen[env] = 1 + for imported_module in env.cimported_modules: + self.find_referenced_modules(imported_module, module_list, modules_seen) + module_list.append(env) + + def generate_module_preamble(self, env, cimported_modules, code): + code.putln('/* Generated by Pyrex %s on %s */' % ( + Version.version, time.asctime())) + code.putln('') + code.putln('#define PY_SSIZE_T_CLEAN') + for filename in env.python_include_files: + code.putln('#include "%s"' % filename) + code.putln("#ifndef PY_LONG_LONG") + code.putln(" #define PY_LONG_LONG LONG_LONG") + code.putln("#endif") + code.putln("#if PY_VERSION_HEX < 0x02050000") + code.putln(" typedef int Py_ssize_t;") + code.putln(" #define PY_SSIZE_T_MAX INT_MAX") + code.putln(" #define PY_SSIZE_T_MIN INT_MIN") + code.putln(" #define PyInt_FromSsize_t(z) PyInt_FromLong(z)") + code.putln(" #define PyInt_AsSsize_t(o) PyInt_AsLong(o)") + code.putln("#endif") + self.generate_extern_c_macro_definition(code) + code.putln("%s double pow(double, double);" % Naming.extern_c_macro) + self.generate_includes(env, cimported_modules, code) + #for filename in env.include_files: + # code.putln('#include "%s"' % filename) + code.putln('') + code.put(Nodes.utility_function_predeclarations) + #if Options.intern_names: + # code.putln(Nodes.get_name_interned_predeclaration) + #else: + # code.putln(get_name_predeclaration) + code.putln('') + code.putln('static PyObject *%s;' % env.module_cname) + code.putln('static PyObject *%s;' % Naming.builtins_cname) + code.putln('static int %s;' % Naming.lineno_cname) + code.putln('static char *%s;' % Naming.filename_cname) + code.putln('static char **%s;' % Naming.filetable_cname) + if env.doc: + code.putln('') + code.putln('static char %s[] = "%s";' % (env.doc_cname, env.doc)) + + def generate_extern_c_macro_definition(self, code): + name = Naming.extern_c_macro + code.putln("#ifdef __cplusplus") + code.putln('#define %s extern "C"' % name) + code.putln("#else") + code.putln("#define %s extern" % name) + code.putln("#endif") + + def generate_includes(self, env, cimported_modules, code): + includes = env.include_files[:] + for module in cimported_modules: + for filename in module.include_files: + if filename not in includes: + includes.append(filename) + for filename in includes: + code.putln('#include "%s"' % filename) + + def generate_filename_table(self, code): + code.putln("") + code.putln("static char *%s[] = {" % Naming.filenames_cname) + if code.filename_list: + for filename in code.filename_list: + filename = os.path.basename(filename) + escaped_filename = filename.replace("\\", "\\\\").replace('"', r'\"') + code.putln('"%s",' % + escaped_filename) + else: + # Some C compilers don't like an empty array + code.putln("0") + code.putln("};") + + def generate_declarations_for_module(self, env, code, definition): + code.putln("") + code.putln("/* Declarations from %s */" % env.qualified_name) + self.generate_type_predeclarations(env, code) + self.generate_type_definitions(env, code) + self.generate_global_declarations(env, code, definition) + self.generate_cfunction_predeclarations(env, code) + + def generate_type_predeclarations(self, env, code): + pass + + def generate_type_definitions(self, env, code): + # Generate definitions of structs/unions/enums. + for entry in env.sue_entries: + if not entry.in_cinclude: + type = entry.type + if type.is_struct_or_union: + self.generate_struct_union_definition(entry, code) + else: + self.generate_enum_definition(entry, code) + # Generate extension type object struct definitions. + for entry in env.c_class_entries: + if not entry.in_cinclude: + self.generate_typeobject_predeclaration(entry, code) + self.generate_obj_struct_definition(entry.type, code) + self.generate_exttype_vtable_struct(entry, code) + self.generate_exttype_vtabptr_declaration(entry, code) + + def sue_header_footer(self, type, kind, name): + if type.typedef_flag: + header = "typedef %s {" % kind + footer = "} %s;" % name + else: + header = "%s %s {" % (kind, name) + footer = "};" + return header, footer + + def generate_struct_union_definition(self, entry, code): + type = entry.type + scope = type.scope + if scope: + header, footer = \ + self.sue_header_footer(type, type.kind, type.cname) + code.putln("") + code.putln(header) + var_entries = scope.var_entries + if not var_entries: + error(entry.pos, + "Empty struct or union definition not allowed outside a" + " 'cdef extern from' block") + for attr in var_entries: + code.putln( + "%s;" % + attr.type.declaration_code(attr.cname)) + code.putln(footer) + + def generate_enum_definition(self, entry, code): + type = entry.type + name = entry.cname or entry.name or "" + header, footer = \ + self.sue_header_footer(type, "enum", name) + code.putln("") + code.putln(header) + enum_values = entry.enum_values + if not enum_values: + error(entry.pos, + "Empty enum definition not allowed outside a" + " 'cdef extern from' block") + else: + last_entry = enum_values[-1] + for value_entry in enum_values: + if value_entry.value == value_entry.name: + value_code = value_entry.cname + else: + value_code = ("%s = %s" % ( + value_entry.cname, + value_entry.value)) + if value_entry is not last_entry: + value_code += "," + code.putln(value_code) + code.putln(footer) + + def generate_typeobject_predeclaration(self, entry, code): + code.putln("") + name = entry.type.typeobj_cname + if name: + if entry.visibility == 'extern' and not entry.in_cinclude: + code.putln("%s DL_IMPORT(PyTypeObject) %s;" % ( + Naming.extern_c_macro, + name)) + elif entry.visibility == 'public': + #code.putln("DL_EXPORT(PyTypeObject) %s;" % name) + code.putln("%s DL_EXPORT(PyTypeObject) %s;" % ( + Naming.extern_c_macro, + name)) + # ??? Do we really need the rest of this? ??? + #else: + # code.putln("staticforward PyTypeObject %s;" % name) + + def generate_exttype_vtable_struct(self, entry, code): + # Generate struct declaration for an extension type's vtable. + type = entry.type + scope = type.scope + if type.vtabstruct_cname: + code.putln("") + code.putln( + "struct %s {" % + type.vtabstruct_cname) + if type.base_type and type.base_type.vtabstruct_cname: + code.putln("struct %s %s;" % ( + type.base_type.vtabstruct_cname, + Naming.obj_base_cname)) + for method_entry in scope.cfunc_entries: + if not method_entry.is_inherited: + code.putln( + "%s;" % method_entry.type.declaration_code("(*%s)" % method_entry.name)) + code.putln( + "};") + + def generate_exttype_vtabptr_declaration(self, entry, code): + # Generate declaration of pointer to an extension type's vtable. + type = entry.type + if type.vtabptr_cname: + code.putln("static struct %s *%s;" % ( + type.vtabstruct_cname, + type.vtabptr_cname)) + + def generate_obj_struct_definition(self, type, code): + # Generate object struct definition for an + # extension type. + if not type.scope: + return # Forward declared but never defined + header, footer = \ + self.sue_header_footer(type, "struct", type.objstruct_cname) + code.putln("") + code.putln(header) + base_type = type.base_type + if base_type: + code.putln( + "%s%s %s;" % ( + ("struct ", "")[base_type.typedef_flag], + base_type.objstruct_cname, + Naming.obj_base_cname)) + else: + code.putln( + "PyObject_HEAD") + if type.vtabslot_cname and not (type.base_type and type.base_type.vtabslot_cname): + code.putln( + "struct %s *%s;" % ( + type.vtabstruct_cname, + type.vtabslot_cname)) + for attr in type.scope.var_entries: + code.putln( + "%s;" % + attr.type.declaration_code(attr.cname)) + code.putln(footer) + + def generate_global_declarations(self, env, code, definition): + code.putln("") + for entry in env.c_class_entries: + code.putln("static PyTypeObject *%s = 0;" % + entry.type.typeptr_cname) + code.put_var_declarations(env.var_entries, static = 1, + dll_linkage = "DL_EXPORT", definition = definition) + code.put_var_declarations(env.default_entries, static = 1) + + def generate_cfunction_predeclarations(self, env, code): + for entry in env.cfunc_entries: + if not entry.in_cinclude: + if entry.visibility == 'public': + dll_linkage = "DL_EXPORT" + else: + dll_linkage = None + header = entry.type.declaration_code(entry.cname, + dll_linkage = dll_linkage) + if entry.visibility == 'private': + storage_class = "static " + elif entry.visibility == 'extern': + storage_class = "%s " % Naming.extern_c_macro + else: + storage_class = "" + code.putln("%s%s; /*proto*/" % ( + storage_class, + header)) + + def generate_typeobj_definitions(self, env, code): + full_module_name = env.qualified_name + for entry in env.c_class_entries: + #print "generate_typeobj_definitions:", entry.name + #print "...visibility =", entry.visibility + if entry.visibility <> 'extern': + type = entry.type + scope = type.scope + if scope: # could be None if there was an error + self.generate_exttype_vtable(scope, code) + self.generate_new_function(scope, code) + self.generate_dealloc_function(scope, code) + self.generate_traverse_function(scope, code) + self.generate_clear_function(scope, code) + if scope.defines_any(["__getitem__"]): + self.generate_getitem_int_function(scope, code) + if scope.defines_any(["__setitem__", "__delitem__"]): + self.generate_ass_subscript_function(scope, code) + if scope.defines_any(["__setslice__", "__delslice__"]): + self.generate_ass_slice_function(scope, code) + if scope.defines_any(["__getattr__"]): + self.generate_getattro_function(scope, code) + if scope.defines_any(["__setattr__", "__delattr__"]): + self.generate_setattro_function(scope, code) + if scope.defines_any(["__get__"]): + self.generate_descr_get_function(scope, code) + if scope.defines_any(["__set__", "__delete__"]): + self.generate_descr_set_function(scope, code) + self.generate_property_accessors(scope, code) + self.generate_method_table(scope, code) + self.generate_member_table(scope, code) + self.generate_getset_table(scope, code) + self.generate_typeobj_definition(full_module_name, entry, code) + + def generate_exttype_vtable(self, scope, code): + # Generate the definition of an extension type's vtable. + type = scope.parent_type + if type.vtable_cname: + code.putln("static struct %s %s;" % ( + type.vtabstruct_cname, + type.vtable_cname)) + + def generate_self_cast(self, scope, code): + type = scope.parent_type + code.putln( + "%s = (%s)o;" % ( + type.declaration_code("p"), + type.declaration_code(""))) + + def generate_new_function(self, scope, code): + base_type = scope.parent_type.base_type + code.putln("") + code.putln( + "static PyObject *%s(PyTypeObject *t, PyObject *a, PyObject *k) {" + % scope.mangle_internal("tp_new")) + if base_type: + code.putln( + "PyObject *o = %s->tp_new(t, a, k);" % + base_type.typeptr_cname) + else: + code.putln( + "PyObject *o = (*t->tp_alloc)(t, 0);") + type = scope.parent_type + py_attrs = [] + for entry in scope.var_entries: + if entry.type.is_pyobject: + py_attrs.append(entry) + if type.vtabslot_cname or py_attrs: + self.generate_self_cast(scope, code) + if type.vtabslot_cname: + code.putln("*(struct %s **)&p->%s = %s;" % ( + type.vtabstruct_cname, + type.vtabslot_cname, + type.vtabptr_cname)) + for entry in py_attrs: + if entry.name == "__weakref__": + code.putln("p->%s = 0;" % entry.cname) + else: + code.put_init_var_to_py_none(entry, "p->%s") + entry = scope.lookup_here("__new__") + if entry: + code.putln( + "if (%s(o, a, k) < 0) {" % + entry.func_cname) + code.put_decref_clear("o", py_object_type); + code.putln( + "}") + code.putln( + "return o;") + code.putln( + "}") + + def generate_dealloc_function(self, scope, code): + base_type = scope.parent_type.base_type + code.putln("") + code.putln( + "static void %s(PyObject *o) {" + % scope.mangle_internal("tp_dealloc")) + py_attrs = [] + for entry in scope.var_entries: + if entry.type.is_pyobject and entry.name <> "__weakref__": + py_attrs.append(entry) + if py_attrs: + self.generate_self_cast(scope, code) + self.generate_usr_dealloc_call(scope, code) + if scope.lookup_here("__weakref__"): + code.putln("PyObject_ClearWeakRefs(o);") + for entry in py_attrs: + code.put_xdecref("p->%s" % entry.cname, entry.type) + if base_type: + code.putln( + "%s->tp_dealloc(o);" % + base_type.typeptr_cname) + else: + code.putln( + "(*o->ob_type->tp_free)(o);") + code.putln( + "}") + + def generate_usr_dealloc_call(self, scope, code): + entry = scope.lookup_here("__dealloc__") + if entry: + code.putln( + "{") + code.putln( + "PyObject *etype, *eval, *etb;") + code.putln( + "PyErr_Fetch(&etype, &eval, &etb);") + code.putln( + "++o->ob_refcnt;") + code.putln( + "%s(o);" % + entry.func_cname) + code.putln( + "if (PyErr_Occurred()) PyErr_WriteUnraisable(o);") + code.putln( + "--o->ob_refcnt;") + code.putln( + "PyErr_Restore(etype, eval, etb);") + code.putln( + "}") + + def generate_traverse_function(self, scope, code): + base_type = scope.parent_type.base_type + code.putln("") + code.putln( + "static int %s(PyObject *o, visitproc v, void *a) {" + % scope.mangle_internal("tp_traverse")) + py_attrs = [] + for entry in scope.var_entries: + if entry.type.is_pyobject: + py_attrs.append(entry) + if base_type or py_attrs: + code.putln( + "int e;") + if py_attrs: + self.generate_self_cast(scope, code) + if base_type: + code.putln( + "e = %s->tp_traverse(o, v, a); if (e) return e;" % + base_type.typeptr_cname) + for entry in py_attrs: + var_code = "p->%s" % entry.cname + code.putln( + "if (%s) {" + % var_code) + if entry.type.is_extension_type: + var_code = "((PyObject*)%s)" % var_code + code.putln( + "e = (*v)(%s, a); if (e) return e;" + % var_code) + code.putln( + "}") + code.putln( + "return 0;") + code.putln( + "}") + + def generate_clear_function(self, scope, code): + base_type = scope.parent_type.base_type + code.putln("") + code.putln( + "static int %s(PyObject *o) {" + % scope.mangle_internal("tp_clear")) + py_attrs = [] + for entry in scope.var_entries: + if entry.type.is_pyobject: + py_attrs.append(entry) + if py_attrs: + self.generate_self_cast(scope, code) + if base_type: + code.putln( + "%s->tp_clear(o);" % + base_type.typeptr_cname) + for entry in py_attrs: + name = "p->%s" % entry.cname + code.put_xdecref(name, entry.type) + code.put_init_var_to_py_none(entry, "p->%s") + code.putln( + "return 0;") + code.putln( + "}") + + def generate_getitem_int_function(self, scope, code): + # This function is put into the sq_item slot when + # a __getitem__ method is present. It converts its + # argument to a Python integer and calls mp_subscript. + code.putln( + "static PyObject *%s(PyObject *o, Py_ssize_t i) {" % + scope.mangle_internal("sq_item")) + code.putln( + "PyObject *r;") + code.putln( + "PyObject *x = PyInt_FromSsize_t(i); if(!x) return 0;") + code.putln( + "r = o->ob_type->tp_as_mapping->mp_subscript(o, x);") + code.putln( + "Py_DECREF(x);") + code.putln( + "return r;") + code.putln( + "}") + + def generate_ass_subscript_function(self, scope, code): + # Setting and deleting an item are both done through + # the ass_subscript method, so we dispatch to user's __setitem__ + # or __delitem__, or raise an exception. + base_type = scope.parent_type.base_type + set_entry = scope.lookup_here("__setitem__") + del_entry = scope.lookup_here("__delitem__") + code.putln("") + code.putln( + "static int %s(PyObject *o, PyObject *i, PyObject *v) {" % + scope.mangle_internal("mp_ass_subscript")) + code.putln( + "if (v) {") + if set_entry: + code.putln( + "return %s(o, i, v);" % + set_entry.func_cname) + else: + self.generate_guarded_basetype_call( + base_type, "tp_as_mapping", "mp_ass_subscript", "o, i, v", code) + code.putln( + "PyErr_Format(PyExc_NotImplementedError,") + code.putln( + ' "Subscript assignment not supported by %s", o->ob_type->tp_name);') + code.putln( + "return -1;") + code.putln( + "}") + code.putln( + "else {") + if del_entry: + code.putln( + "return %s(o, i);" % + del_entry.func_cname) + else: + self.generate_guarded_basetype_call( + base_type, "tp_as_mapping", "mp_ass_subscript", "o, i, v", code) + code.putln( + "PyErr_Format(PyExc_NotImplementedError,") + code.putln( + ' "Subscript deletion not supported by %s", o->ob_type->tp_name);') + code.putln( + "return -1;") + code.putln( + "}") + code.putln( + "}") + + def generate_guarded_basetype_call( + self, base_type, substructure, slot, args, code): + if base_type: + base_tpname = base_type.typeptr_cname + if substructure: + code.putln( + "if (%s->%s && %s->%s->%s)" % ( + base_tpname, substructure, base_tpname, substructure, slot)) + code.putln( + " return %s->%s->%s(%s);" % ( + base_tpname, substructure, slot, args)) + else: + code.putln( + "if (%s->%s)" % ( + base_tpname, slot)) + code.putln( + " return %s->%s(%s);" % ( + base_tpname, slot, args)) + + def generate_ass_slice_function(self, scope, code): + # Setting and deleting a slice are both done through + # the ass_slice method, so we dispatch to user's __setslice__ + # or __delslice__, or raise an exception. + base_type = scope.parent_type.base_type + set_entry = scope.lookup_here("__setslice__") + del_entry = scope.lookup_here("__delslice__") + code.putln("") + code.putln( + "static int %s(PyObject *o, Py_ssize_t i, Py_ssize_t j, PyObject *v) {" % + scope.mangle_internal("sq_ass_slice")) + code.putln( + "if (v) {") + if set_entry: + code.putln( + "return %s(o, i, j, v);" % + set_entry.func_cname) + else: + self.generate_guarded_basetype_call( + base_type, "tp_as_sequence", "sq_ass_slice", "o, i, j, v", code) + code.putln( + "PyErr_Format(PyExc_NotImplementedError,") + code.putln( + ' "2-element slice assignment not supported by %s", o->ob_type->tp_name);') + code.putln( + "return -1;") + code.putln( + "}") + code.putln( + "else {") + if del_entry: + code.putln( + "return %s(o, i, j);" % + del_entry.func_cname) + else: + self.generate_guarded_basetype_call( + base_type, "tp_as_sequence", "sq_ass_slice", "o, i, j, v", code) + code.putln( + "PyErr_Format(PyExc_NotImplementedError,") + code.putln( + ' "2-element slice deletion not supported by %s", o->ob_type->tp_name);') + code.putln( + "return -1;") + code.putln( + "}") + code.putln( + "}") + + def generate_getattro_function(self, scope, code): + # First try to get the attribute using PyObject_GenericGetAttr. + # If that raises an AttributeError, call the user's __getattr__ + # method. + entry = scope.lookup_here("__getattr__") + code.putln("") + code.putln( + "static PyObject *%s(PyObject *o, PyObject *n) {" + % scope.mangle_internal("tp_getattro")) + code.putln( + "PyObject *v = PyObject_GenericGetAttr(o, n);") + code.putln( + "if (!v && PyErr_ExceptionMatches(PyExc_AttributeError)) {") + code.putln( + "PyErr_Clear();") + code.putln( + "v = %s(o, n);" % + entry.func_cname) + code.putln( + "}") + code.putln( + "return v;") + code.putln( + "}") + + def generate_setattro_function(self, scope, code): + # Setting and deleting an attribute are both done through + # the setattro method, so we dispatch to user's __setattr__ + # or __delattr__ or fall back on PyObject_GenericSetAttr. + base_type = scope.parent_type.base_type + set_entry = scope.lookup_here("__setattr__") + del_entry = scope.lookup_here("__delattr__") + code.putln("") + code.putln( + "static int %s(PyObject *o, PyObject *n, PyObject *v) {" % + scope.mangle_internal("tp_setattro")) + code.putln( + "if (v) {") + if set_entry: + code.putln( + "return %s(o, n, v);" % + set_entry.func_cname) + else: + self.generate_guarded_basetype_call( + base_type, None, "tp_setattro", "o, n, v", code) + code.putln( + "return PyObject_GenericSetAttr(o, n, v);") + code.putln( + "}") + code.putln( + "else {") + if del_entry: + code.putln( + "return %s(o, n);" % + del_entry.func_cname) + else: + self.generate_guarded_basetype_call( + base_type, None, "tp_setattro", "o, n, v", code) + code.putln( + "return PyObject_GenericSetAttr(o, n, 0);") + code.putln( + "}") + code.putln( + "}") + + def generate_descr_get_function(self, scope, code): + # The __get__ function of a descriptor object can be + # called with NULL for the second or third arguments + # under some circumstances, so we replace them with + # None in that case. + user_get_entry = scope.lookup_here("__get__") + code.putln("") + code.putln( + "static PyObject *%s(PyObject *o, PyObject *i, PyObject *c) {" % + scope.mangle_internal("tp_descr_get")) + code.putln( + "PyObject *r = 0;") + code.putln( + "if (!i) i = Py_None;") + code.putln( + "if (!c) c = Py_None;") + #code.put_incref("i", py_object_type) + #code.put_incref("c", py_object_type) + code.putln( + "r = %s(o, i, c);" % + user_get_entry.func_cname) + #code.put_decref("i", py_object_type) + #code.put_decref("c", py_object_type) + code.putln( + "return r;") + code.putln( + "}") + + def generate_descr_set_function(self, scope, code): + # Setting and deleting are both done through the __set__ + # method of a descriptor, so we dispatch to user's __set__ + # or __delete__ or raise an exception. + base_type = scope.parent_type.base_type + user_set_entry = scope.lookup_here("__set__") + user_del_entry = scope.lookup_here("__delete__") + code.putln("") + code.putln( + "static int %s(PyObject *o, PyObject *i, PyObject *v) {" % + scope.mangle_internal("tp_descr_set")) + code.putln( + "if (v) {") + if user_set_entry: + code.putln( + "return %s(o, i, v);" % + user_set_entry.func_cname) + else: + self.generate_guarded_basetype_call( + base_type, None, "tp_descr_set", "o, i, v", code) + code.putln( + 'PyErr_SetString(PyExc_NotImplementedError, "__set__");') + code.putln( + "return -1;") + code.putln( + "}") + code.putln( + "else {") + if user_del_entry: + code.putln( + "return %s(o, i);" % + user_del_entry.func_cname) + else: + self.generate_guarded_basetype_call( + base_type, None, "tp_descr_set", "o, i, v", code) + code.putln( + 'PyErr_SetString(PyExc_NotImplementedError, "__delete__");') + code.putln( + "return -1;") + code.putln( + "}") + code.putln( + "}") + + def generate_property_accessors(self, cclass_scope, code): + for entry in cclass_scope.property_entries: + property_scope = entry.scope + if property_scope.defines_any(["__get__"]): + self.generate_property_get_function(entry, code) + if property_scope.defines_any(["__set__", "__del__"]): + self.generate_property_set_function(entry, code) + + def generate_property_get_function(self, property_entry, code): + property_scope = property_entry.scope + property_entry.getter_cname = property_scope.parent_scope.mangle( + Naming.prop_get_prefix, property_entry.name) + get_entry = property_scope.lookup_here("__get__") + code.putln("") + code.putln( + "static PyObject *%s(PyObject *o, void *x) {" % + property_entry.getter_cname) + code.putln( + "return %s(o);" % + get_entry.func_cname) + code.putln( + "}") + + def generate_property_set_function(self, property_entry, code): + property_scope = property_entry.scope + property_entry.setter_cname = property_scope.parent_scope.mangle( + Naming.prop_set_prefix, property_entry.name) + set_entry = property_scope.lookup_here("__set__") + del_entry = property_scope.lookup_here("__del__") + code.putln("") + code.putln( + "static int %s(PyObject *o, PyObject *v, void *x) {" % + property_entry.setter_cname) + code.putln( + "if (v) {") + if set_entry: + code.putln( + "return %s(o, v);" % + set_entry.func_cname) + else: + code.putln( + 'PyErr_SetString(PyExc_NotImplementedError, "__set__");') + code.putln( + "return -1;") + code.putln( + "}") + code.putln( + "else {") + if del_entry: + code.putln( + "return %s(o);" % + del_entry.func_cname) + else: + code.putln( + 'PyErr_SetString(PyExc_NotImplementedError, "__del__");') + code.putln( + "return -1;") + code.putln( + "}") + code.putln( + "}") + + def generate_typeobj_definition(self, modname, entry, code): + type = entry.type + scope = type.scope + for suite in TypeSlots.substructures: + suite.generate_substructure(scope, code) + code.putln("") + if entry.visibility == 'public': + header = "DL_EXPORT(PyTypeObject) %s = {" + else: + #header = "statichere PyTypeObject %s = {" + header = "PyTypeObject %s = {" + #code.putln(header % scope.parent_type.typeobj_cname) + code.putln(header % type.typeobj_cname) + code.putln( + "PyObject_HEAD_INIT(0)") + code.putln( + "0, /*ob_size*/") + code.putln( + '"%s.%s", /*tp_name*/' % ( + modname, scope.class_name)) + if type.typedef_flag: + objstruct = type.objstruct_cname + else: + #objstruct = "struct %s" % scope.parent_type.objstruct_cname + objstruct = "struct %s" % type.objstruct_cname + code.putln( + "sizeof(%s), /*tp_basicsize*/" % + objstruct) + code.putln( + "0, /*tp_itemsize*/") + for slot in TypeSlots.slot_table: + slot.generate(scope, code) + code.putln( + "};") + + def generate_method_table(self, env, code): + code.putln("") + code.putln( + "static struct PyMethodDef %s[] = {" % + env.method_table_cname) + for entry in env.pyfunc_entries: + code.put_pymethoddef(entry, ",") + code.putln( + "{0, 0, 0, 0}") + code.putln( + "};") + + def generate_member_table(self, env, code): + #print "ModuleNode.generate_member_table: scope =", env ### + if env.public_attr_entries: + code.putln("") + code.putln( + "static struct PyMemberDef %s[] = {" % + env.member_table_cname) + type = env.parent_type + if type.typedef_flag: + objstruct = type.objstruct_cname + else: + objstruct = "struct %s" % type.objstruct_cname + for entry in env.public_attr_entries: + type_code = entry.type.pymemberdef_typecode + if entry.visibility == 'readonly': + flags = "READONLY" + else: + flags = "0" + code.putln('{"%s", %s, %s, %s, 0},' % ( + entry.name, + type_code, + "offsetof(%s, %s)" % (objstruct, entry.name), + flags)) + code.putln( + "{0, 0, 0, 0, 0}") + code.putln( + "};") + + def generate_getset_table(self, env, code): + if env.property_entries: + code.putln("") + code.putln( + "static struct PyGetSetDef %s[] = {" % + env.getset_table_cname) + for entry in env.property_entries: + code.putln( + '{"%s", %s, %s, %s, 0},' % ( + entry.name, + entry.getter_cname or "0", + entry.setter_cname or "0", + entry.doc_cname or "0")) + code.putln( + "{0, 0, 0, 0, 0}") + code.putln( + "};") + + def generate_interned_name_table(self, env, code): + items = env.intern_map.items() + if items: + items.sort() + code.putln("") + code.putln( + "static __Pyx_InternTabEntry %s[] = {" % + Naming.intern_tab_cname) + for (name, cname) in items: + code.putln( + '{&%s, "%s"},' % ( + cname, + name)) + code.putln( + "{0, 0}") + code.putln( + "};") + + def generate_py_string_table(self, env, code): + entries = env.all_pystring_entries + if entries: + code.putln("") + code.putln( + "static __Pyx_StringTabEntry %s[] = {" % + Naming.stringtab_cname) + for entry in entries: + code.putln( + "{&%s, %s, sizeof(%s)}," % ( + entry.pystring_cname, + entry.cname, + entry.cname)) + code.putln( + "{0, 0, 0}") + code.putln( + "};") + + def generate_c_api_table(self, env, code): + public_funcs = [] + for entry in env.cfunc_entries: + if entry.visibility == 'public': + public_funcs.append(entry.cname) + if public_funcs: + env.use_utility_code(Nodes.c_api_import_code) + code.putln( + "static __Pyx_CApiTabEntry %s[] = {" % + Naming.c_api_tab_cname) + public_funcs.sort() + for entry_cname in public_funcs: + code.putln('{"%s", %s},' % (entry_cname, entry_cname)) + code.putln( + "{0, 0}") + code.putln( + "};") + + def generate_c_api_import_code(self, env, h_code): + # this is written to the header file! + h_code.put(""" + /* Return -1 and set exception on error, 0 on success. */ + static int + import_%(name)s(PyObject *module) + { + if (module != NULL) { + PyObject *c_api_init = PyObject_GetAttrString( + module, "_import_c_api"); + if (!c_api_init) + return -1; + if (!PyCObject_Check(c_api_init)) + { + Py_DECREF(c_api_init); + PyErr_SetString(PyExc_RuntimeError, + "%(name)s module provided an invalid C-API reference"); + return -1; + } + + int (*init)(struct {const char *s; const void **p;}*) = + PyCObject_AsVoidPtr(c_api_init); + Py_DECREF(c_api_init); + if (!init) { + PyErr_SetString(PyExc_RuntimeError, + "%(name)s module returned NULL pointer for C-API init function"); + return -1; + } + + if (init(_%(name)s_API)) + return -1; + } + return 0; + } + """.replace('\n ', '\n') % {'name' : env.module_name}) + + def generate_c_api_init_code(self, env, code): + public_funcs = [] + for entry in env.cfunc_entries: + if entry.visibility == 'public': + public_funcs.append(entry) + if public_funcs: + code.putln('if (__Pyx_InitCApi(%s) < 0) %s' % ( + Naming.module_cname, + code.error_goto(self.pos))) + + def generate_filename_init_prototype(self, code): + code.putln(""); + code.putln("static void %s(void); /*proto*/" % Naming.fileinit_cname) + + def generate_module_init_func(self, imported_modules, env, code): + code.putln("") + header = "PyMODINIT_FUNC init%s(void)" % env.module_name + code.putln("%s; /*proto*/" % header) + code.putln("%s {" % header) + code.put_var_declarations(env.temp_entries) + #code.putln("/*--- Libary function declarations ---*/") + env.generate_library_function_declarations(code) + self.generate_filename_init_call(code) + #code.putln("/*--- Module creation code ---*/") + self.generate_module_creation_code(env, code) + #code.putln("/*--- Intern code ---*/") + self.generate_intern_code(env, code) + #code.putln("/*--- String init code ---*/") + self.generate_string_init_code(env, code) + #code.putln("/*--- External C API setup code ---*/") + self.generate_c_api_init_code(env, code) + #code.putln("/*--- Global init code ---*/") + self.generate_global_init_code(env, code) + + #code.putln("/*--- Type init code ---*/") + self.generate_type_init_code(env, code) + + #code.putln("/*--- Type import code ---*/") + for module in imported_modules: + self.generate_type_import_code_for_module(module, env, code) + + #code.putln("/*--- Execution code ---*/") + self.body.generate_execution_code(code) + code.putln("return;") + code.put_label(code.error_label) + code.put_var_xdecrefs(env.temp_entries) + code.putln('__Pyx_AddTraceback("%s");' % (env.qualified_name)) + env.use_utility_code(Nodes.traceback_utility_code) + code.putln('}') + + def generate_filename_init_call(self, code): + code.putln("%s();" % Naming.fileinit_cname) + + def generate_module_creation_code(self, env, code): + # Generate code to create the module object and + # install the builtins. + if env.doc: + doc = env.doc_cname + else: + doc = "0" + code.putln( + '%s = Py_InitModule4("%s", %s, %s, 0, PYTHON_API_VERSION);' % ( + env.module_cname, + env.module_name, + env.method_table_cname, + doc)) + code.putln( + "if (!%s) %s;" % ( + env.module_cname, + code.error_goto(self.pos))); + code.putln( + '%s = PyImport_AddModule("__builtin__");' % + Naming.builtins_cname) + code.putln( + "if (!%s) %s;" % ( + Naming.builtins_cname, + code.error_goto(self.pos))); + code.putln( + 'if (PyObject_SetAttrString(%s, "__builtins__", %s) < 0) %s;' % ( + env.module_cname, + Naming.builtins_cname, + code.error_goto(self.pos))) + + def generate_intern_code(self, env, code): + if env.intern_map: + env.use_utility_code(Nodes.init_intern_tab_utility_code); + code.putln( + "if (__Pyx_InternStrings(%s) < 0) %s;" % ( + Naming.intern_tab_cname, + code.error_goto(self.pos))) + + def generate_string_init_code(self, env, code): + if env.all_pystring_entries: + env.use_utility_code(Nodes.init_string_tab_utility_code) + code.putln( + "if (__Pyx_InitStrings(%s) < 0) %s;" % ( + Naming.stringtab_cname, + code.error_goto(self.pos))) + + def generate_global_init_code(self, env, code): + # Generate code to initialise global PyObject * + # variables to None. + for entry in env.var_entries: + if entry.visibility <> 'extern': + if entry.type.is_pyobject: + code.put_init_var_to_py_none(entry) + + def generate_type_import_code_for_module(self, module, env, code): + # Generate type import code for all extension types in + # an imported module. + if module.c_class_entries: + for entry in module.c_class_entries: + self.generate_type_import_code(env, entry.type, entry.pos, code) + + def generate_type_init_code(self, env, code): + # Generate type import code for extern extension types + # and type ready code for non-extern ones. + for entry in env.c_class_entries: + if entry.visibility == 'extern': + self.generate_type_import_code(env, entry.type, entry.pos, code) + else: + self.generate_base_type_import_code(env, entry, code) + self.generate_exttype_vtable_init_code(entry, code) + self.generate_type_ready_code(env, entry, code) + self.generate_typeptr_assignment_code(entry, code) + + def generate_base_type_import_code(self, env, entry, code): + base_type = entry.type.base_type + if base_type and base_type.module_name <> env.qualified_name: + self.generate_type_import_code(env, base_type, self.pos, code) + + def use_type_import_utility_code(self, env): + import ExprNodes + env.use_utility_code(Nodes.type_import_utility_code) + env.use_utility_code(ExprNodes.import_utility_code) + + def generate_type_import_code(self, env, type, pos, code): + # If not already done, generate code to import the typeobject of an + # extension type defined in another module, and extract its C method + # table pointer if any. + if type in env.types_imported: + return + if type.typedef_flag: + objstruct = type.objstruct_cname + else: + objstruct = "struct %s" % type.objstruct_cname + code.putln('%s = __Pyx_ImportType("%s", "%s", sizeof(%s)); if (!%s) %s' % ( + type.typeptr_cname, + type.module_name, + type.name, + objstruct, + type.typeptr_cname, + code.error_goto(pos))) + self.use_type_import_utility_code(env) + if type.vtabptr_cname: + code.putln( + "if (__Pyx_GetVtable(%s->tp_dict, &%s) < 0) %s" % ( + type.typeptr_cname, + type.vtabptr_cname, + code.error_goto(pos))) + env.use_utility_code(Nodes.get_vtable_utility_code) + env.types_imported[type] = 1 + + def generate_type_ready_code(self, env, entry, code): + # Generate a call to PyType_Ready for an extension + # type defined in this module. + type = entry.type + typeobj_cname = type.typeobj_cname + scope = type.scope + if scope: # could be None if there was an error + if entry.visibility <> 'extern': + for slot in TypeSlots.slot_table: + slot.generate_dynamic_init_code(scope, code) + code.putln( + "if (PyType_Ready(&%s) < 0) %s" % ( + typeobj_cname, + code.error_goto(entry.pos))) + if type.vtable_cname: + code.putln( + "if (__Pyx_SetVtable(%s.tp_dict, %s) < 0) %s" % ( + typeobj_cname, + type.vtabptr_cname, + code.error_goto(entry.pos))) + env.use_utility_code(Nodes.set_vtable_utility_code) + code.putln( + 'if (PyObject_SetAttrString(%s, "%s", (PyObject *)&%s) < 0) %s' % ( + Naming.module_cname, + scope.class_name, + typeobj_cname, + code.error_goto(entry.pos))) + weakref_entry = scope.lookup_here("__weakref__") + if weakref_entry: + if weakref_entry.type is py_object_type: + tp_weaklistoffset = "%s.tp_weaklistoffset" % typeobj_cname + code.putln("if (%s == 0) %s = offsetof(struct %s, %s);" % ( + tp_weaklistoffset, + tp_weaklistoffset, + type.objstruct_cname, + weakref_entry.cname)) + else: + error(weakref_entry.pos, "__weakref__ slot must be of type 'object'") + + def generate_exttype_vtable_init_code(self, entry, code): + # Generate code to initialise the C method table of an + # extension type. + type = entry.type + if type.vtable_cname: + code.putln( + "%s = &%s;" % ( + type.vtabptr_cname, + type.vtable_cname)) + if type.base_type and type.base_type.vtabptr_cname: + code.putln( + "%s.%s = *%s;" % ( + type.vtable_cname, + Naming.obj_base_cname, + type.base_type.vtabptr_cname)) + for meth_entry in type.scope.cfunc_entries: + if meth_entry.func_cname: + code.putln( + "*(void(**)())&%s.%s = (void(*)())%s;" % ( + type.vtable_cname, + meth_entry.cname, + meth_entry.func_cname)) + + def generate_typeptr_assignment_code(self, entry, code): + # Generate code to initialise the typeptr of an extension + # type defined in this module to point to its type object. + type = entry.type + if type.typeobj_cname: + code.putln( + "%s = &%s;" % ( + type.typeptr_cname, type.typeobj_cname)) + + def generate_utility_functions(self, env, code): + code.putln("") + code.putln("/* Runtime support code */") + code.putln("") + code.putln("static void %s(void) {" % Naming.fileinit_cname) + code.putln("%s = %s;" % + (Naming.filetable_cname, Naming.filenames_cname)) + code.putln("}") + for utility_code in env.utility_code_used: + code.h.put(utility_code[0]) + code.put(utility_code[1]) Modified: lxml/pyrex/Pyrex/Compiler/Nodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Nodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/Nodes.py Wed Feb 14 15:20:32 2007 @@ -2,7 +2,7 @@ # Pyrex - Parse tree nodes # -import os, string, sys, time +import string, sys import Code from Errors import error, InternalError @@ -11,8 +11,6 @@ from PyrexTypes import py_object_type, error_type, CTypedefType from Symtab import ModuleScope, LocalScope, \ StructOrUnionScope, PyClassScope, CClassScope -import TypeSlots -import Version from Pyrex.Utils import open_new_file, replace_suffix import Options @@ -95,1304 +93,6 @@ for entry in entries: code.putln( "static PyObject *%s;" % entry.pystring_cname) - - -class ModuleNode(Node, BlockNode): - # doc string or None - # body StatListNode - - def analyse_declarations(self, env): - env.doc = self.doc - self.body.analyse_declarations(env) - - def process_implementation(self, env, result): - self.analyse_declarations(env) - env.check_c_classes() - self.body.analyse_expressions(env) - env.return_type = PyrexTypes.c_void_type - self.generate_c_code(env, result) - self.generate_h_code(env, result) - - def generate_h_code(self, env, result): - public_vars = [] - public_funcs = [] - public_extension_types = [] - for entry in env.var_entries: - if entry.visibility == 'public': - public_vars.append(entry) - for entry in env.cfunc_entries: - if entry.visibility == 'public': - public_funcs.append(entry) - for entry in env.c_class_entries: - if entry.visibility == 'public': - public_extension_types.append(entry) - if public_vars or public_funcs or public_extension_types: - result.h_file = replace_suffix(result.c_file, ".h") - result.i_file = replace_suffix(result.c_file, ".pxi") - h_code = Code.CCodeWriter(result.h_file) - i_code = Code.PyrexCodeWriter(result.i_file) - header_barrier = "__HAS_PYX_" + env.module_name - h_code.putln("#ifndef %s" % header_barrier) - h_code.putln("#define %s" % header_barrier) - self.generate_extern_c_macro_definition(h_code) - for entry in public_vars: - h_code.putln("%s %s;" % ( - Naming.extern_c_macro, - entry.type.declaration_code( - entry.cname, dll_linkage = "DL_IMPORT"))) - i_code.putln("cdef extern %s" % - entry.type.declaration_code(entry.cname, pyrex = 1)) - for entry in public_extension_types: - self.generate_cclass_header_code(entry.type, h_code) - self.generate_cclass_include_code(entry.type, i_code) - if public_funcs: - for entry in public_funcs: - h_code.putln( - 'static %s;' % - entry.type.declaration_code("(*%s)" % entry.cname)) - i_code.putln("cdef extern %s" % - entry.type.declaration_code(entry.cname, pyrex = 1)) - h_code.putln( - "static struct {char *s; void **p;} _%s_API[] = {" % - env.module_name) - for entry in public_funcs: - h_code.putln('{"%s", &%s},' % (entry.cname, entry.cname)) - h_code.putln("{0, 0}") - h_code.putln("};") - self.generate_c_api_import_code(env, h_code) - h_code.putln("PyMODINIT_FUNC init%s(void);" % env.module_name) - h_code.putln("#endif /* %s */" % header_barrier) - - def generate_cclass_header_code(self, type, h_code): - #h_code.putln("extern DL_IMPORT(PyTypeObject) %s;" % type.typeobj_cname) - h_code.putln("%s DL_IMPORT(PyTypeObject) %s;" % ( - Naming.extern_c_macro, - type.typeobj_cname)) - self.generate_obj_struct_definition(type, h_code) - - def generate_cclass_include_code(self, type, i_code): - i_code.putln("cdef extern class %s.%s:" % ( - type.module_name, type.name)) - i_code.indent() - var_entries = type.scope.var_entries - if var_entries: - for entry in var_entries: - i_code.putln("cdef %s" % - entry.type.declaration_code(entry.cname, pyrex = 1)) - else: - i_code.putln("pass") - i_code.dedent() - - def generate_c_code(self, env, result): - modules = [] - self.find_referenced_modules(env, modules, {}) - code = Code.CCodeWriter(result.c_file) - code.init_labels() - self.generate_module_preamble(env, modules, code) - for module in modules: - self.generate_declarations_for_module(module, code, - definition = module is env) - code.putln("") - code.putln("/* Implementation of %s */" % env.qualified_name) - self.generate_const_definitions(env, code) - self.generate_interned_name_decls(env, code) - self.generate_py_string_decls(env, code) - self.body.generate_function_definitions(env, code) - self.generate_interned_name_table(env, code) - self.generate_py_string_table(env, code) - self.generate_c_api_table(env, code) - self.generate_typeobj_definitions(env, code) - self.generate_method_table(env, code) - self.generate_filename_init_prototype(code) - self.generate_module_init_func(modules[:-1], env, code) - self.generate_filename_table(code) - self.generate_utility_functions(env, code) - result.c_file_generated = 1 - - def find_referenced_modules(self, env, module_list, modules_seen): - if env not in modules_seen: - modules_seen[env] = 1 - for imported_module in env.cimported_modules: - self.find_referenced_modules(imported_module, module_list, modules_seen) - module_list.append(env) - - def generate_module_preamble(self, env, cimported_modules, code): - code.putln('/* Generated by Pyrex %s on %s */' % ( - Version.version, time.asctime())) - code.putln('') - code.putln('#define PY_SSIZE_T_CLEAN') - for filename in env.python_include_files: - code.putln('#include "%s"' % filename) - code.putln("#ifndef PY_LONG_LONG") - code.putln(" #define PY_LONG_LONG LONG_LONG") - code.putln("#endif") - code.putln("#if PY_VERSION_HEX < 0x02050000") - code.putln(" typedef int Py_ssize_t;") - code.putln(" #define PY_SSIZE_T_MAX INT_MAX") - code.putln(" #define PY_SSIZE_T_MIN INT_MIN") - code.putln(" #define PyInt_FromSsize_t(z) PyInt_FromLong(z)") - code.putln(" #define PyInt_AsSsize_t(o) PyInt_AsLong(o)") - code.putln("#endif") - self.generate_extern_c_macro_definition(code) - code.putln("%s double pow(double, double);" % Naming.extern_c_macro) - self.generate_includes(env, cimported_modules, code) - #for filename in env.include_files: - # code.putln('#include "%s"' % filename) - code.putln('') - code.put(utility_function_predeclarations) - if Options.intern_names: - code.putln(get_name_interned_predeclaration) - else: - code.putln(get_name_predeclaration) - code.putln('') - code.putln('static PyObject *%s;' % env.module_cname) - code.putln('static PyObject *%s;' % Naming.builtins_cname) - code.putln('static int %s;' % Naming.lineno_cname) - code.putln('static char *%s;' % Naming.filename_cname) - code.putln('static char **%s;' % Naming.filetable_cname) - if env.doc: - code.putln('') - code.putln('static char %s[] = "%s";' % (env.doc_cname, env.doc)) - - def generate_extern_c_macro_definition(self, code): - name = Naming.extern_c_macro - code.putln("#ifdef __cplusplus") - code.putln('#define %s extern "C"' % name) - code.putln("#else") - code.putln("#define %s extern" % name) - code.putln("#endif") - - def generate_includes(self, env, cimported_modules, code): - includes = env.include_files[:] - for module in cimported_modules: - for filename in module.include_files: - if filename not in includes: - includes.append(filename) - for filename in includes: - code.putln('#include "%s"' % filename) - - def generate_filename_table(self, code): - code.putln("") - code.putln("static char *%s[] = {" % Naming.filenames_cname) - if code.filename_list: - for filename in code.filename_list: - filename = os.path.basename(filename) - escaped_filename = filename.replace("\\", "\\\\").replace('"', r'\"') - code.putln('"%s",' % - escaped_filename) - else: - # Some C compilers don't like an empty array - code.putln("0") - code.putln("};") - - def generate_declarations_for_module(self, env, code, definition): - code.putln("") - code.putln("/* Declarations from %s */" % env.qualified_name) - self.generate_type_predeclarations(env, code) - self.generate_type_definitions(env, code) - self.generate_global_declarations(env, code, definition) - self.generate_cfunction_predeclarations(env, code) - - def generate_type_predeclarations(self, env, code): - pass - - def generate_type_definitions(self, env, code): - # Generate definitions of structs/unions/enums. - for entry in env.sue_entries: - if not entry.in_cinclude: - type = entry.type - if type.is_struct_or_union: - self.generate_struct_union_definition(entry, code) - else: - self.generate_enum_definition(entry, code) - # Generate extension type object struct definitions. - for entry in env.c_class_entries: - if not entry.in_cinclude: - self.generate_typeobject_predeclaration(entry, code) - self.generate_obj_struct_definition(entry.type, code) - self.generate_exttype_vtable_struct(entry, code) - self.generate_exttype_vtabptr_declaration(entry, code) - - def sue_header_footer(self, type, kind, name): - if type.typedef_flag: - header = "typedef %s {" % kind - footer = "} %s;" % name - else: - header = "%s %s {" % (kind, name) - footer = "};" - return header, footer - - def generate_struct_union_definition(self, entry, code): - type = entry.type - scope = type.scope - if scope: - header, footer = \ - self.sue_header_footer(type, type.kind, type.cname) - code.putln("") - code.putln(header) - var_entries = scope.var_entries - if not var_entries: - error(entry.pos, - "Empty struct or union definition not allowed outside a" - " 'cdef extern from' block") - for attr in var_entries: - code.putln( - "%s;" % - attr.type.declaration_code(attr.cname)) - code.putln(footer) - - def generate_enum_definition(self, entry, code): - type = entry.type - name = entry.cname or entry.name or "" - header, footer = \ - self.sue_header_footer(type, "enum", name) - code.putln("") - code.putln(header) - enum_values = entry.enum_values - if not enum_values: - error(entry.pos, - "Empty enum definition not allowed outside a" - " 'cdef extern from' block") - for value_entry in enum_values: - if value_entry.value == value_entry.name: - code.putln( - "%s," % - value_entry.cname) - else: - code.putln( - "%s = %s," % ( - value_entry.cname, - value_entry.value)) - code.putln(footer) - - def generate_typeobject_predeclaration(self, entry, code): - code.putln("") - name = entry.type.typeobj_cname - if name: - if entry.visibility == 'extern' and not entry.in_cinclude: - code.putln("%s DL_IMPORT(PyTypeObject) %s;" % ( - Naming.extern_c_macro, - name)) - elif entry.visibility == 'public': - #code.putln("DL_EXPORT(PyTypeObject) %s;" % name) - code.putln("%s DL_EXPORT(PyTypeObject) %s;" % ( - Naming.extern_c_macro, - name)) - # ??? Do we really need the rest of this? ??? - #else: - # code.putln("staticforward PyTypeObject %s;" % name) - - def generate_exttype_vtable_struct(self, entry, code): - # Generate struct declaration for an extension type's vtable. - type = entry.type - scope = type.scope - if type.vtabstruct_cname: - code.putln("") - code.putln( - "struct %s {" % - type.vtabstruct_cname) - if type.base_type and type.base_type.vtabstruct_cname: - code.putln("struct %s %s;" % ( - type.base_type.vtabstruct_cname, - Naming.obj_base_cname)) - for method_entry in scope.cfunc_entries: - if not method_entry.is_inherited: - code.putln( - "%s;" % method_entry.type.declaration_code("(*%s)" % method_entry.name)) - code.putln( - "};") - - def generate_exttype_vtabptr_declaration(self, entry, code): - # Generate declaration of pointer to an extension type's vtable. - type = entry.type - if type.vtabptr_cname: - code.putln("static struct %s *%s;" % ( - type.vtabstruct_cname, - type.vtabptr_cname)) - - def generate_obj_struct_definition(self, type, code): - # Generate object struct definition for an - # extension type. - if not type.scope: - return # Forward declared but never defined - header, footer = \ - self.sue_header_footer(type, "struct", type.objstruct_cname) - code.putln("") - code.putln(header) - base_type = type.base_type - if base_type: - code.putln( - "%s%s %s;" % ( - ("struct ", "")[base_type.typedef_flag], - base_type.objstruct_cname, - Naming.obj_base_cname)) - else: - code.putln( - "PyObject_HEAD") - if type.vtabslot_cname and not (type.base_type and type.base_type.vtabslot_cname): - code.putln( - "struct %s *%s;" % ( - type.vtabstruct_cname, - type.vtabslot_cname)) - for attr in type.scope.var_entries: - code.putln( - "%s;" % - attr.type.declaration_code(attr.cname)) - code.putln(footer) - - def generate_global_declarations(self, env, code, definition): - code.putln("") - for entry in env.c_class_entries: - code.putln("static PyTypeObject *%s = 0;" % - entry.type.typeptr_cname) - code.put_var_declarations(env.var_entries, static = 1, - dll_linkage = "DL_EXPORT", definition = definition) - code.put_var_declarations(env.default_entries, static = 1) - - def generate_cfunction_predeclarations(self, env, code): - for entry in env.cfunc_entries: - if not entry.in_cinclude: - if entry.visibility == 'public': - dll_linkage = "DL_EXPORT" - else: - dll_linkage = None - header = entry.type.declaration_code(entry.cname, - dll_linkage = dll_linkage) - if entry.visibility == 'private': - storage_class = "static " - elif entry.visibility == 'extern': - storage_class = "%s " % Naming.extern_c_macro - else: - storage_class = "" - code.putln("%s%s; /*proto*/" % ( - storage_class, - header)) - - def generate_typeobj_definitions(self, env, code): - full_module_name = env.qualified_name - for entry in env.c_class_entries: - #print "generate_typeobj_definitions:", entry.name - #print "...visibility =", entry.visibility - if entry.visibility <> 'extern': - type = entry.type - scope = type.scope - if scope: # could be None if there was an error - self.generate_exttype_vtable(scope, code) - self.generate_new_function(scope, code) - self.generate_dealloc_function(scope, code) - self.generate_traverse_function(scope, code) - self.generate_clear_function(scope, code) - if scope.defines_any(["__getitem__"]): - self.generate_getitem_int_function(scope, code) - if scope.defines_any(["__setitem__", "__delitem__"]): - self.generate_ass_subscript_function(scope, code) - if scope.defines_any(["__setslice__", "__delslice__"]): - self.generate_ass_slice_function(scope, code) - if scope.defines_any(["__getattr__"]): - self.generate_getattro_function(scope, code) - if scope.defines_any(["__setattr__", "__delattr__"]): - self.generate_setattro_function(scope, code) - if scope.defines_any(["__get__"]): - self.generate_descr_get_function(scope, code) - if scope.defines_any(["__set__", "__delete__"]): - self.generate_descr_set_function(scope, code) - self.generate_property_accessors(scope, code) - self.generate_method_table(scope, code) - self.generate_member_table(scope, code) - self.generate_getset_table(scope, code) - self.generate_typeobj_definition(full_module_name, entry, code) - - def generate_exttype_vtable(self, scope, code): - # Generate the definition of an extension type's vtable. - type = scope.parent_type - if type.vtable_cname: - code.putln("static struct %s %s;" % ( - type.vtabstruct_cname, - type.vtable_cname)) - - def generate_self_cast(self, scope, code): - type = scope.parent_type - code.putln( - "%s = (%s)o;" % ( - type.declaration_code("p"), - type.declaration_code(""))) - - def generate_new_function(self, scope, code): - base_type = scope.parent_type.base_type - code.putln("") - code.putln( - "static PyObject *%s(PyTypeObject *t, PyObject *a, PyObject *k) {" - % scope.mangle_internal("tp_new")) - if base_type: - code.putln( - "PyObject *o = %s->tp_new(t, a, k);" % - base_type.typeptr_cname) - else: - code.putln( - "PyObject *o = (*t->tp_alloc)(t, 0);") - self.generate_self_cast(scope, code) - type = scope.parent_type - if type.vtabslot_cname: - code.putln("*(struct %s **)&p->%s = %s;" % ( - type.vtabstruct_cname, - type.vtabslot_cname, - type.vtabptr_cname)) - for entry in scope.var_entries: - if entry.type.is_pyobject: - code.put_init_var_to_py_none(entry, "p->%s") - entry = scope.lookup_here("__new__") - if entry: - code.putln( - "if (%s(o, a, k) < 0) {" % - entry.func_cname) - code.put_decref_clear("o", py_object_type); - code.putln( - "}") - code.putln( - "return o;") - code.putln( - "}") - - def generate_dealloc_function(self, scope, code): - base_type = scope.parent_type.base_type - code.putln("") - code.putln( - "static void %s(PyObject *o) {" - % scope.mangle_internal("tp_dealloc")) - self.generate_self_cast(scope, code) - self.generate_usr_dealloc_call(scope, code) - for entry in scope.var_entries: - if entry.type.is_pyobject: - code.put_xdecref("p->%s" % entry.cname, entry.type) - if base_type: - code.putln( - "%s->tp_dealloc(o);" % - base_type.typeptr_cname) - else: - code.putln( - "(*o->ob_type->tp_free)(o);") - code.putln( - "}") - - def generate_usr_dealloc_call(self, scope, code): - entry = scope.lookup_here("__dealloc__") - if entry: - code.putln( - "{") - code.putln( - "PyObject *etype, *eval, *etb;") - code.putln( - "PyErr_Fetch(&etype, &eval, &etb);") - code.putln( - "++o->ob_refcnt;") - code.putln( - "%s(o);" % - entry.func_cname) - code.putln( - "if (PyErr_Occurred()) PyErr_WriteUnraisable(o);") - code.putln( - "--o->ob_refcnt;") - code.putln( - "PyErr_Restore(etype, eval, etb);") - code.putln( - "}") - - def generate_traverse_function(self, scope, code): - base_type = scope.parent_type.base_type - code.putln("") - code.putln( - "static int %s(PyObject *o, visitproc v, void *a) {" - % scope.mangle_internal("tp_traverse")) - code.putln( - "int e;") - self.generate_self_cast(scope, code) - if base_type: - code.putln( - "e = %s->tp_traverse(o, v, a); if (e) return e;" % - base_type.typeptr_cname) - for entry in scope.var_entries: - if entry.type.is_pyobject: - var_code = "p->%s" % entry.cname - code.putln( - "if (%s) {" - % var_code) - if entry.type.is_extension_type: - var_code = "((PyObject*)%s)" % var_code - code.putln( - "e = (*v)(%s, a); if (e) return e;" - % var_code) - code.putln( - "}") - code.putln( - "return 0;") - code.putln( - "}") - - def generate_clear_function(self, scope, code): - base_type = scope.parent_type.base_type - code.putln("") - code.putln( - "static int %s(PyObject *o) {" - % scope.mangle_internal("tp_clear")) - self.generate_self_cast(scope, code) - if base_type: - code.putln( - "%s->tp_clear(o);" % - base_type.typeptr_cname) - for entry in scope.var_entries: - if entry.type.is_pyobject: - name = "p->%s" % entry.cname - code.put_xdecref(name, entry.type) - #code.put_init_to_py_none(name) - code.put_init_var_to_py_none(entry, "p->%s") - code.putln( - "return 0;") - code.putln( - "}") - - def generate_getitem_int_function(self, scope, code): - # This function is put into the sq_item slot when - # a __getitem__ method is present. It converts its - # argument to a Python integer and calls mp_subscript. - code.putln( - "static PyObject *%s(PyObject *o, Py_ssize_t i) {" % - scope.mangle_internal("sq_item")) - code.putln( - "PyObject *r;") - code.putln( - "PyObject *x = PyInt_FromSsize_t(i); if(!x) return 0;") - code.putln( - "r = o->ob_type->tp_as_mapping->mp_subscript(o, x);") - code.putln( - "Py_DECREF(x);") - code.putln( - "return r;") - code.putln( - "}") - - def generate_ass_subscript_function(self, scope, code): - # Setting and deleting an item are both done through - # the ass_subscript method, so we dispatch to user's __setitem__ - # or __delitem__, or raise an exception. - base_type = scope.parent_type.base_type - set_entry = scope.lookup_here("__setitem__") - del_entry = scope.lookup_here("__delitem__") - code.putln("") - code.putln( - "static int %s(PyObject *o, PyObject *i, PyObject *v) {" % - scope.mangle_internal("mp_ass_subscript")) - code.putln( - "if (v) {") - if set_entry: - code.putln( - "return %s(o, i, v);" % - set_entry.func_cname) - else: - self.generate_guarded_basetype_call( - base_type, "tp_as_mapping", "mp_ass_subscript", "o, i, v", code) - code.putln( - "PyErr_Format(PyExc_NotImplementedError,") - code.putln( - ' "Subscript assignment not supported by %s", o->ob_type->tp_name);') - code.putln( - "return -1;") - code.putln( - "}") - code.putln( - "else {") - if del_entry: - code.putln( - "return %s(o, i);" % - del_entry.func_cname) - else: - self.generate_guarded_basetype_call( - base_type, "tp_as_mapping", "mp_ass_subscript", "o, i, v", code) - code.putln( - "PyErr_Format(PyExc_NotImplementedError,") - code.putln( - ' "Subscript deletion not supported by %s", o->ob_type->tp_name);') - code.putln( - "return -1;") - code.putln( - "}") - code.putln( - "}") - - def generate_guarded_basetype_call( - self, base_type, substructure, slot, args, code): - if base_type: - base_tpname = base_type.typeptr_cname - if substructure: - code.putln( - "if (%s->%s && %s->%s->%s)" % ( - base_tpname, substructure, base_tpname, substructure, slot)) - code.putln( - " return %s->%s->%s(%s);" % ( - base_tpname, substructure, slot, args)) - else: - code.putln( - "if (%s->%s)" % ( - base_tpname, slot)) - code.putln( - " return %s->%s(%s);" % ( - base_tpname, slot, args)) - - def generate_ass_slice_function(self, scope, code): - # Setting and deleting a slice are both done through - # the ass_slice method, so we dispatch to user's __setslice__ - # or __delslice__, or raise an exception. - base_type = scope.parent_type.base_type - set_entry = scope.lookup_here("__setslice__") - del_entry = scope.lookup_here("__delslice__") - code.putln("") - code.putln( - "static int %s(PyObject *o, Py_ssize_t i, Py_ssize_t j, PyObject *v) {" % - scope.mangle_internal("sq_ass_slice")) - code.putln( - "if (v) {") - if set_entry: - code.putln( - "return %s(o, i, j, v);" % - set_entry.func_cname) - else: - self.generate_guarded_basetype_call( - base_type, "tp_as_sequence", "sq_ass_slice", "o, i, j, v", code) - code.putln( - "PyErr_Format(PyExc_NotImplementedError,") - code.putln( - ' "2-element slice assignment not supported by %s", o->ob_type->tp_name);') - code.putln( - "return -1;") - code.putln( - "}") - code.putln( - "else {") - if del_entry: - code.putln( - "return %s(o, i, j);" % - del_entry.func_cname) - else: - self.generate_guarded_basetype_call( - base_type, "tp_as_sequence", "sq_ass_slice", "o, i, j, v", code) - code.putln( - "PyErr_Format(PyExc_NotImplementedError,") - code.putln( - ' "2-element slice deletion not supported by %s", o->ob_type->tp_name);') - code.putln( - "return -1;") - code.putln( - "}") - code.putln( - "}") - - def generate_getattro_function(self, scope, code): - # First try to get the attribute using PyObject_GenericGetAttr. - # If that raises an AttributeError, call the user's __getattr__ - # method. - entry = scope.lookup_here("__getattr__") - code.putln("") - code.putln( - "static PyObject *%s(PyObject *o, PyObject *n) {" - % scope.mangle_internal("tp_getattro")) - code.putln( - "PyObject *v = PyObject_GenericGetAttr(o, n);") - code.putln( - "if (!v && PyErr_ExceptionMatches(PyExc_AttributeError)) {") - code.putln( - "PyErr_Clear();") - code.putln( - "v = %s(o, n);" % - entry.func_cname) - code.putln( - "}") - code.putln( - "return v;") - code.putln( - "}") - - def generate_setattro_function(self, scope, code): - # Setting and deleting an attribute are both done through - # the setattro method, so we dispatch to user's __setattr__ - # or __delattr__ or fall back on PyObject_GenericSetAttr. - base_type = scope.parent_type.base_type - set_entry = scope.lookup_here("__setattr__") - del_entry = scope.lookup_here("__delattr__") - code.putln("") - code.putln( - "static int %s(PyObject *o, PyObject *n, PyObject *v) {" % - scope.mangle_internal("tp_setattro")) - code.putln( - "if (v) {") - if set_entry: - code.putln( - "return %s(o, n, v);" % - set_entry.func_cname) - else: - self.generate_guarded_basetype_call( - base_type, None, "tp_setattro", "o, n, v", code) - code.putln( - "return PyObject_GenericSetAttr(o, n, v);") - code.putln( - "}") - code.putln( - "else {") - if del_entry: - code.putln( - "return %s(o, n);" % - del_entry.func_cname) - else: - self.generate_guarded_basetype_call( - base_type, None, "tp_setattro", "o, n, v", code) - code.putln( - "return PyObject_GenericSetAttr(o, n, 0);") - code.putln( - "}") - code.putln( - "}") - - def generate_descr_get_function(self, scope, code): - # The __get__ function of a descriptor object can be - # called with NULL for the second or third arguments - # under some circumstances, so we replace them with - # None in that case. - user_get_entry = scope.lookup_here("__get__") - code.putln("") - code.putln( - "static PyObject *%s(PyObject *o, PyObject *i, PyObject *c) {" % - scope.mangle_internal("tp_descr_get")) - code.putln( - "PyObject *r = 0;") - code.putln( - "if (!i) i = Py_None;") - code.putln( - "if (!c) c = Py_None;") - #code.put_incref("i", py_object_type) - #code.put_incref("c", py_object_type) - code.putln( - "r = %s(o, i, c);" % - user_get_entry.func_cname) - #code.put_decref("i", py_object_type) - #code.put_decref("c", py_object_type) - code.putln( - "return r;") - code.putln( - "}") - - def generate_descr_set_function(self, scope, code): - # Setting and deleting are both done through the __set__ - # method of a descriptor, so we dispatch to user's __set__ - # or __delete__ or raise an exception. - base_type = scope.parent_type.base_type - user_set_entry = scope.lookup_here("__set__") - user_del_entry = scope.lookup_here("__delete__") - code.putln("") - code.putln( - "static int %s(PyObject *o, PyObject *i, PyObject *v) {" % - scope.mangle_internal("tp_descr_set")) - code.putln( - "if (v) {") - if user_set_entry: - code.putln( - "return %s(o, i, v);" % - user_set_entry.func_cname) - else: - self.generate_guarded_basetype_call( - base_type, None, "tp_descr_set", "o, i, v", code) - code.putln( - 'PyErr_SetString(PyExc_NotImplementedError, "__set__");') - code.putln( - "return -1;") - code.putln( - "}") - code.putln( - "else {") - if user_del_entry: - code.putln( - "return %s(o, i);" % - user_del_entry.func_cname) - else: - self.generate_guarded_basetype_call( - base_type, None, "tp_descr_set", "o, i, v", code) - code.putln( - 'PyErr_SetString(PyExc_NotImplementedError, "__delete__");') - code.putln( - "return -1;") - code.putln( - "}") - code.putln( - "}") - - def generate_property_accessors(self, cclass_scope, code): - for entry in cclass_scope.property_entries: - property_scope = entry.scope - if property_scope.defines_any(["__get__"]): - self.generate_property_get_function(entry, code) - if property_scope.defines_any(["__set__", "__del__"]): - self.generate_property_set_function(entry, code) - - def generate_property_get_function(self, property_entry, code): - property_scope = property_entry.scope - property_entry.getter_cname = property_scope.parent_scope.mangle( - Naming.prop_get_prefix, property_entry.name) - get_entry = property_scope.lookup_here("__get__") - code.putln("") - code.putln( - "static PyObject *%s(PyObject *o, void *x) {" % - property_entry.getter_cname) - code.putln( - "return %s(o);" % - get_entry.func_cname) - code.putln( - "}") - - def generate_property_set_function(self, property_entry, code): - property_scope = property_entry.scope - property_entry.setter_cname = property_scope.parent_scope.mangle( - Naming.prop_set_prefix, property_entry.name) - set_entry = property_scope.lookup_here("__set__") - del_entry = property_scope.lookup_here("__del__") - code.putln("") - code.putln( - "static int %s(PyObject *o, PyObject *v, void *x) {" % - property_entry.setter_cname) - code.putln( - "if (v) {") - if set_entry: - code.putln( - "return %s(o, v);" % - set_entry.func_cname) - else: - code.putln( - 'PyErr_SetString(PyExc_NotImplementedError, "__set__");') - code.putln( - "return -1;") - code.putln( - "}") - code.putln( - "else {") - if del_entry: - code.putln( - "return %s(o);" % - del_entry.func_cname) - else: - code.putln( - 'PyErr_SetString(PyExc_NotImplementedError, "__del__");') - code.putln( - "return -1;") - code.putln( - "}") - code.putln( - "}") - - def generate_typeobj_definition(self, modname, entry, code): - type = entry.type - scope = type.scope - for suite in TypeSlots.substructures: - suite.generate_substructure(scope, code) - code.putln("") - if entry.visibility == 'public': - header = "DL_EXPORT(PyTypeObject) %s = {" - else: - #header = "statichere PyTypeObject %s = {" - header = "PyTypeObject %s = {" - #code.putln(header % scope.parent_type.typeobj_cname) - code.putln(header % type.typeobj_cname) - code.putln( - "PyObject_HEAD_INIT(0)") - code.putln( - "0, /*ob_size*/") - code.putln( - '"%s.%s", /*tp_name*/' % ( - modname, scope.class_name)) - if type.typedef_flag: - objstruct = type.objstruct_cname - else: - #objstruct = "struct %s" % scope.parent_type.objstruct_cname - objstruct = "struct %s" % type.objstruct_cname - code.putln( - "sizeof(%s), /*tp_basicsize*/" % - objstruct) - code.putln( - "0, /*tp_itemsize*/") - for slot in TypeSlots.slot_table: - slot.generate(scope, code) - code.putln( - "};") - - def generate_method_table(self, env, code): - code.putln("") - code.putln( - "static struct PyMethodDef %s[] = {" % - env.method_table_cname) - for entry in env.pyfunc_entries: - code.put_pymethoddef(entry, ",") - code.putln( - "{0, 0, 0, 0}") - code.putln( - "};") - - def generate_member_table(self, env, code): - #print "ModuleNode.generate_member_table: scope =", env ### - if env.public_attr_entries: - code.putln("") - code.putln( - "static struct PyMemberDef %s[] = {" % - env.member_table_cname) - type = env.parent_type - if type.typedef_flag: - objstruct = type.objstruct_cname - else: - objstruct = "struct %s" % type.objstruct_cname - for entry in env.public_attr_entries: - type_code = entry.type.pymemberdef_typecode - if entry.visibility == 'readonly': - flags = "READONLY" - else: - flags = "0" - code.putln('{"%s", %s, %s, %s, 0},' % ( - entry.name, - type_code, - "offsetof(%s, %s)" % (objstruct, entry.name), - flags)) - code.putln( - "{0, 0, 0, 0, 0}") - code.putln( - "};") - - def generate_getset_table(self, env, code): - if env.property_entries: - code.putln("") - code.putln( - "static struct PyGetSetDef %s[] = {" % - env.getset_table_cname) - for entry in env.property_entries: - code.putln( - '{"%s", %s, %s, %s, 0},' % ( - entry.name, - entry.getter_cname or "0", - entry.setter_cname or "0", - entry.doc_cname or "0")) - code.putln( - "{0, 0, 0, 0, 0}") - code.putln( - "};") - - def generate_interned_name_table(self, env, code): - items = env.intern_map.items() - if items: - items.sort() - code.putln("") - code.putln( - "static __Pyx_InternTabEntry %s[] = {" % - Naming.intern_tab_cname) - for (name, cname) in items: - code.putln( - '{&%s, "%s"},' % ( - cname, - name)) - code.putln( - "{0, 0}") - code.putln( - "};") - - def generate_py_string_table(self, env, code): - entries = env.all_pystring_entries - if entries: - code.putln("") - code.putln( - "static __Pyx_StringTabEntry %s[] = {" % - Naming.stringtab_cname) - for entry in entries: - code.putln( - "{&%s, %s, sizeof(%s)}," % ( - entry.pystring_cname, - entry.cname, - entry.cname)) - code.putln( - "{0, 0, 0}") - code.putln( - "};") - - def generate_c_api_table(self, env, code): - public_funcs = [] - for entry in env.cfunc_entries: - if entry.visibility == 'public': - public_funcs.append(entry.cname) - if public_funcs: - env.use_utility_code(c_api_import_code); - code.putln( - "static __Pyx_CApiTabEntry %s[] = {" % - Naming.c_api_tab_cname) - public_funcs.sort() - for entry_cname in public_funcs: - code.putln('{"%s", %s},' % (entry_cname, entry_cname)) - code.putln( - "{0, 0}") - code.putln( - "};") - - def generate_c_api_import_code(self, env, h_code): - # this is written to the header file! - h_code.put(""" - /* Return -1 and set exception on error, 0 on success. */ - static int - import_%(name)s(PyObject *module) - { - if (module != NULL) { - PyObject *c_api_init = PyObject_GetAttrString( - module, "_import_c_api"); - if (!c_api_init) - return -1; - if (PyCObject_Check(c_api_init)) - { - int (*init)(struct {const char *s; const void **p;}*) = - PyCObject_AsVoidPtr(c_api_init); - if (!init) { - PyErr_SetString(PyExc_RuntimeError, - "module returns NULL pointer for C API call"); - return -1; - } - init(_%(name)s_API); - } - Py_DECREF(c_api_init); - } - return 0; - } - """.replace('\n ', '\n') % {'name' : env.module_name}) - - def generate_c_api_init_code(self, env, code): - public_funcs = [] - for entry in env.cfunc_entries: - if entry.visibility == 'public': - public_funcs.append(entry) - if public_funcs: - code.putln('if (__Pyx_InitCApi(%s) < 0) %s' % ( - Naming.module_cname, - code.error_goto(self.pos))) - - def generate_filename_init_prototype(self, code): - code.putln(""); - code.putln("static void %s(void); /*proto*/" % Naming.fileinit_cname) - - def generate_module_init_func(self, imported_modules, env, code): - code.putln("") - header = "PyMODINIT_FUNC init%s(void)" % env.module_name - code.putln("%s; /*proto*/" % header) - code.putln("%s {" % header) - code.put_var_declarations(env.temp_entries) - #code.putln("/*--- Libary function declarations ---*/") - env.generate_library_function_declarations(code) - self.generate_filename_init_call(code) - #code.putln("/*--- Module creation code ---*/") - self.generate_module_creation_code(env, code) - #code.putln("/*--- Intern code ---*/") - self.generate_intern_code(env, code) - #code.putln("/*--- String init code ---*/") - self.generate_string_init_code(env, code) - #code.putln("/*--- External C API setup code ---*/") - self.generate_c_api_init_code(env, code) - #code.putln("/*--- Global init code ---*/") - self.generate_global_init_code(env, code) - #code.putln("/*--- Type import code ---*/") - for module in imported_modules: - self.generate_type_import_code_for_module(module, env, code) - #code.putln("/*--- Type init code ---*/") - self.generate_type_init_code(env, code) - #code.putln("/*--- Execution code ---*/") - self.body.generate_execution_code(code) - code.putln("return;") - code.put_label(code.error_label) - code.put_var_xdecrefs(env.temp_entries) - code.putln('__Pyx_AddTraceback("%s");' % (env.qualified_name)) - env.use_utility_code(traceback_utility_code) - code.putln('}') - - def generate_filename_init_call(self, code): - code.putln("%s();" % Naming.fileinit_cname) - - def generate_module_creation_code(self, env, code): - # Generate code to create the module object and - # install the builtins. - if env.doc: - doc = env.doc_cname - else: - doc = "0" - code.putln( - '%s = Py_InitModule4("%s", %s, %s, 0, PYTHON_API_VERSION);' % ( - env.module_cname, - env.module_name, - env.method_table_cname, - doc)) - code.putln( - "if (!%s) %s;" % ( - env.module_cname, - code.error_goto(self.pos))); - code.putln( - '%s = PyImport_AddModule("__builtin__");' % - Naming.builtins_cname) - code.putln( - "if (!%s) %s;" % ( - Naming.builtins_cname, - code.error_goto(self.pos))); - code.putln( - 'if (PyObject_SetAttrString(%s, "__builtins__", %s) < 0) %s;' % ( - env.module_cname, - Naming.builtins_cname, - code.error_goto(self.pos))) - - def generate_intern_code(self, env, code): - if env.intern_map: - env.use_utility_code(init_intern_tab_utility_code); - code.putln( - "if (__Pyx_InternStrings(%s) < 0) %s;" % ( - Naming.intern_tab_cname, - code.error_goto(self.pos))) - - def generate_string_init_code(self, env, code): - if env.all_pystring_entries: - env.use_utility_code(init_string_tab_utility_code) - code.putln( - "if (__Pyx_InitStrings(%s) < 0) %s;" % ( - Naming.stringtab_cname, - code.error_goto(self.pos))) - - def generate_global_init_code(self, env, code): - # Generate code to initialise global PyObject * - # variables to None. - for entry in env.var_entries: - if entry.visibility <> 'extern': - if entry.type.is_pyobject: - code.put_init_var_to_py_none(entry) - - def generate_type_import_code_for_module(self, module, env, code): - # Generate type import code for all extension types in - # an imported module. - if module.c_class_entries: - for entry in module.c_class_entries: - self.generate_type_import_code(env, entry, code) - - def generate_type_init_code(self, env, code): - # Generate type import code for extern extension types - # and type ready code for non-extern ones. - for entry in env.c_class_entries: - if entry.visibility == 'extern': - self.generate_type_import_code(env, entry, code) - else: - self.generate_exttype_vtable_init_code(entry, code) - self.generate_type_ready_code(env, entry, code) - self.generate_typeptr_assignment_code(entry, code) - - def use_type_import_utility_code(self, env): - import ExprNodes - env.use_utility_code(type_import_utility_code) - env.use_utility_code(ExprNodes.import_utility_code) - - def generate_type_import_code(self, env, entry, code): - # Generate code to import the typeobject of an - # extension type defined in another module, and - # extract its C method table pointer if any. - type = entry.type - if type.typedef_flag: - objstruct = type.objstruct_cname - else: - objstruct = "struct %s" % type.objstruct_cname - code.putln('%s = __Pyx_ImportType("%s", "%s", sizeof(%s)); if (!%s) %s' % ( - type.typeptr_cname, - type.module_name, - type.name, - objstruct, - type.typeptr_cname, - code.error_goto(entry.pos))) - self.use_type_import_utility_code(env) - if type.vtabptr_cname: - code.putln( - "if (__Pyx_GetVtable(%s->tp_dict, &%s) < 0) %s" % ( - type.typeptr_cname, - type.vtabptr_cname, - code.error_goto(entry.pos))) - env.use_utility_code(get_vtable_utility_code) - - def generate_type_ready_code(self, env, entry, code): - # Generate a call to PyType_Ready for an extension - # type defined in this module. - type = entry.type - typeobj_cname = type.typeobj_cname - scope = type.scope - if scope: # could be None if there was an error - if entry.visibility <> 'extern': - for slot in TypeSlots.slot_table: - slot.generate_dynamic_init_code(scope, code) - code.putln( - "if (PyType_Ready(&%s) < 0) %s" % ( - typeobj_cname, - code.error_goto(entry.pos))) - if type.vtable_cname: - code.putln( - "if (__Pyx_SetVtable(%s.tp_dict, %s) < 0) %s" % ( - typeobj_cname, - type.vtabptr_cname, - code.error_goto(entry.pos))) - env.use_utility_code(set_vtable_utility_code) - code.putln( - 'if (PyObject_SetAttrString(%s, "%s", (PyObject *)&%s) < 0) %s' % ( - Naming.module_cname, - scope.class_name, - typeobj_cname, - code.error_goto(entry.pos))) - weakref_entry = scope.lookup_here("__weakref__") - if weakref_entry: - if weakref_entry.type is py_object_type: - tp_weaklistoffset = "%s.tp_weaklistoffset" % typeobj_cname - code.putln("if (%s == 0) %s = offsetof(struct %s, %s);" % ( - tp_weaklistoffset, - tp_weaklistoffset, - type.objstruct_cname, - weakref_entry.cname)) - else: - error(weakref_entry.pos, "__weakref__ slot must be of type 'object'") - - def generate_exttype_vtable_init_code(self, entry, code): - # Generate code to initialise the C method table of an - # extension type. - type = entry.type - if type.vtable_cname: - code.putln( - "%s = &%s;" % ( - type.vtabptr_cname, - type.vtable_cname)) - if type.base_type and type.base_type.vtabptr_cname: - code.putln( - "%s.%s = *%s;" % ( - type.vtable_cname, - Naming.obj_base_cname, - type.base_type.vtabptr_cname)) - for meth_entry in type.scope.cfunc_entries: - if meth_entry.func_cname: - code.putln( - "*(void **)&%s.%s = (void *)%s;" % ( - type.vtable_cname, - meth_entry.cname, - meth_entry.func_cname)) - - def generate_typeptr_assignment_code(self, entry, code): - # Generate code to initialise the typeptr of an extension - # type defined in this module to point to its type object. - type = entry.type - if type.typeobj_cname: - code.putln( - "%s = &%s;" % ( - type.typeptr_cname, type.typeobj_cname)) - - def generate_utility_functions(self, env, code): - code.putln("") - code.putln("/* Runtime support code */") - code.putln("") - code.putln("static void %s(void) {" % Naming.fileinit_cname) - code.putln("%s = %s;" % - (Naming.filetable_cname, Naming.filenames_cname)) - code.putln("}") - for utility_code in env.utility_code_used: - code.put(utility_code) class StatListNode(Node): @@ -1787,8 +487,6 @@ # ----- Top-level constants used by this function self.generate_interned_name_decls(lenv, code) self.generate_py_string_decls(lenv, code) - #code.putln("") - #code.put_var_declarations(lenv.const_entries, static = 1) self.generate_const_definitions(lenv, code) # ----- Function header code.putln("") @@ -1811,13 +509,12 @@ # ----- Fetch arguments self.generate_argument_parsing_code(code) self.generate_argument_increfs(lenv, code) - #self.generate_stararg_getting_code(code) - self.generate_argument_conversion_code(code) # ----- Initialise local variables for entry in lenv.var_entries: - if entry.type.is_pyobject and entry.init_to_none: + if entry.type.is_pyobject and entry.init_to_none and entry.used: code.put_init_var_to_py_none(entry) - # ----- Check types of arguments + # ----- Check and convert arguments + self.generate_argument_conversion_code(code) self.generate_argument_type_tests(code) # ----- Function body self.body.generate_execution_code(code) @@ -1833,29 +530,31 @@ val = self.return_type.default_value if val: code.putln("%s = %s;" % (Naming.retval_cname, val)) - code.putln("goto %s;" % code.return_label) + #code.putln("goto %s;" % code.return_label) # ----- Error cleanup - code.put_label(code.error_label) - code.put_var_xdecrefs(lenv.temp_entries) - err_val = self.error_value() - exc_check = self.caller_will_check_exceptions() - if err_val is not None or exc_check: - code.putln( - '__Pyx_AddTraceback("%s");' % - self.entry.qualified_name) - if err_val is not None: + if code.error_label in code.labels_used: + code.put_goto(code.return_label) + code.put_label(code.error_label) + code.put_var_xdecrefs(lenv.temp_entries) + err_val = self.error_value() + exc_check = self.caller_will_check_exceptions() + if err_val is not None or exc_check: + code.putln( + '__Pyx_AddTraceback("%s");' % + self.entry.qualified_name) + if err_val is not None: + code.putln( + "%s = %s;" % ( + Naming.retval_cname, + err_val)) + else: code.putln( - "%s = %s;" % ( - Naming.retval_cname, - err_val)) - else: - code.putln( - '__Pyx_WriteUnraisable("%s");' % - self.entry.qualified_name) - env.use_utility_code(unraisable_exception_utility_code) + '__Pyx_WriteUnraisable("%s");' % + self.entry.qualified_name) + env.use_utility_code(unraisable_exception_utility_code) # ----- Return cleanup code.put_label(code.return_label) - code.put_var_decrefs(lenv.var_entries) + code.put_var_decrefs(lenv.var_entries, used_only = 1) code.put_var_decrefs(lenv.arg_entries) self.put_stararg_decrefs(code) if not self.return_type.is_void: @@ -1964,9 +663,6 @@ def generate_argument_parsing_code(self, code): pass -# def generate_stararg_getting_code(self, code): -# pass - def generate_argument_conversion_code(self, code): pass @@ -2107,6 +803,7 @@ arg.entry.init_to_none = 0 else: arg.entry = self.declare_argument(env, arg) + arg.entry.used = 1 arg.entry.is_self_arg = arg.is_self_arg if arg.hdr_type: if arg.is_self_arg or \ @@ -2117,11 +814,13 @@ def declare_python_arg(self, env, arg): if arg: - arg.entry = env.declare_var(arg.name, + entry = env.declare_var(arg.name, PyrexTypes.py_object_type, arg.pos) - arg.entry.init = "0" - arg.entry.init_to_none = 0 - arg.entry.xdecref_cleanup = 1 + entry.used = 1 + entry.init = "0" + entry.init_to_none = 0 + entry.xdecref_cleanup = 1 + arg.entry = entry def analyse_expressions(self, env): self.analyse_default_values(env) @@ -2136,6 +835,7 @@ arg.default = arg.default.coerce_to(arg.type, env) arg.default.allocate_temps(env) arg.default_entry = env.add_default_value(arg.type) + arg.default_entry.used = 1 else: error(arg.pos, "This argument cannot have a default value") @@ -2431,11 +1131,10 @@ self.scope = cenv self.body.analyse_declarations(cenv) self.body.analyse_expressions(cenv) - self.target.analyse_target_expression(env) + self.target.analyse_target_expression(env, self.classobj) self.dict.release_temp(env) - self.classobj.release_temp(env) - self.target.release_target_temp(env) - #env.recycle_pending_temps() + #self.classobj.release_temp(env) + #self.target.release_target_temp(env) def generate_function_definitions(self, env, code): self.generate_py_string_decls(self.scope, code) @@ -2580,7 +1279,6 @@ def analyse_expressions(self, env): self.expr.analyse_expressions(env) self.expr.release_temp(env) - #env.recycle_pending_temps() # TEMPORARY def generate_execution_code(self, code): self.expr.generate_evaluation_code(code) @@ -2599,8 +1297,13 @@ # to any of the left hand sides. def analyse_expressions(self, env): - self.analyse_expressions_1(env) - self.analyse_expressions_2(env) + self.analyse_types(env) + self.allocate_rhs_temps(env) + self.allocate_lhs_temps(env) + +# def analyse_expressions(self, env): +# self.analyse_expressions_1(env) +# self.analyse_expressions_2(env) def generate_execution_code(self, code): self.generate_rhs_evaluation_code(code) @@ -2618,18 +1321,33 @@ def analyse_declarations(self, env): self.lhs.analyse_target_declaration(env) - def analyse_expressions_1(self, env, use_temp = 0): + def analyse_types(self, env, use_temp = 0): self.rhs.analyse_types(env) self.lhs.analyse_target_types(env) self.rhs = self.rhs.coerce_to(self.lhs.type, env) if use_temp: self.rhs = self.rhs.coerce_to_temp(env) + + def allocate_rhs_temps(self, env): self.rhs.allocate_temps(env) + + def allocate_lhs_temps(self, env): + self.lhs.allocate_target_temps(env, self.rhs) + #self.lhs.release_target_temp(env) + #self.rhs.release_temp(env) - def analyse_expressions_2(self, env): - self.lhs.allocate_target_temps(env) - self.lhs.release_target_temp(env) - self.rhs.release_temp(env) +# def analyse_expressions_1(self, env, use_temp = 0): +# self.rhs.analyse_types(env) +# self.lhs.analyse_target_types(env) +# self.rhs = self.rhs.coerce_to(self.lhs.type, env) +# if use_temp: +# self.rhs = self.rhs.coerce_to_temp(env) +# self.rhs.allocate_temps(env) +# +# def analyse_expressions_2(self, env): +# self.lhs.allocate_target_temps(env) +# self.lhs.release_target_temp(env) +# self.rhs.release_temp(env) def generate_rhs_evaluation_code(self, code): self.rhs.generate_evaluation_code(code) @@ -2654,31 +1372,12 @@ for lhs in self.lhs_list: lhs.analyse_target_declaration(env) -# def analyse_expressions(self, env): -# import ExprNodes -# self.rhs.analyse_types(env) -# self.rhs = self.rhs.coerce_to_temp(env) -# self.rhs.allocate_temps(env) -# self.coerced_rhs_list = [] -# for lhs in self.lhs_list: -# lhs.analyse_target_types(env) -# coerced_rhs = ExprNodes.CloneNode(self.rhs).coerce_to(lhs.type, env) -# self.coerced_rhs_list.append(coerced_rhs) -# coerced_rhs.allocate_temps(env) -# lhs.allocate_target_temps(env) -# coerced_rhs.release_temp(env) -# lhs.release_target_temp(env) -# self.rhs.release_temp(env) - - def analyse_expressions_1(self, env, use_temp = 0): + def analyse_types(self, env, use_temp = 0): self.rhs.analyse_types(env) if use_temp: self.rhs = self.rhs.coerce_to_temp(env) else: self.rhs = self.rhs.coerce_to_simple(env) - self.rhs.allocate_temps(env) - - def analyse_expressions_2(self, env): from ExprNodes import CloneNode self.coerced_rhs_list = [] for lhs in self.lhs_list: @@ -2686,21 +1385,39 @@ rhs = CloneNode(self.rhs) rhs = rhs.coerce_to(lhs.type, env) self.coerced_rhs_list.append(rhs) + + def allocate_rhs_temps(self, env): + self.rhs.allocate_temps(env) + + def allocate_lhs_temps(self, env): + for lhs, rhs in zip(self.lhs_list, self.coerced_rhs_list): rhs.allocate_temps(env) - lhs.allocate_target_temps(env) - lhs.release_target_temp(env) - rhs.release_temp(env) + lhs.allocate_target_temps(env, rhs) + #lhs.release_target_temp(env) + #rhs.release_temp(env) self.rhs.release_temp(env) - -# def generate_execution_code(self, code): -# self.rhs.generate_evaluation_code(code) -# for i in range(len(self.lhs_list)): -# lhs = self.lhs_list[i] -# rhs = self.coerced_rhs_list[i] -# rhs.generate_evaluation_code(code) -# lhs.generate_assignment_code(rhs, code) -# # Assignment has already disposed of the cloned RHS -# self.rhs.generate_disposal_code(code) + +# def analyse_expressions_1(self, env, use_temp = 0): +# self.rhs.analyse_types(env) +# if use_temp: +# self.rhs = self.rhs.coerce_to_temp(env) +# else: +# self.rhs = self.rhs.coerce_to_simple(env) +# self.rhs.allocate_temps(env) +# +# def analyse_expressions_2(self, env): +# from ExprNodes import CloneNode +# self.coerced_rhs_list = [] +# for lhs in self.lhs_list: +# lhs.analyse_target_types(env) +# rhs = CloneNode(self.rhs) +# rhs = rhs.coerce_to(lhs.type, env) +# self.coerced_rhs_list.append(rhs) +# rhs.allocate_temps(env) +# lhs.allocate_target_temps(env) +# lhs.release_target_temp(env) +# rhs.release_temp(env) +# self.rhs.release_temp(env) def generate_rhs_evaluation_code(self, code): self.rhs.generate_evaluation_code(code) @@ -2734,9 +1451,16 @@ def analyse_expressions(self, env): for stat in self.stats: - stat.analyse_expressions_1(env, use_temp = 1) + stat.analyse_types(env, use_temp = 1) + stat.allocate_rhs_temps(env) for stat in self.stats: - stat.analyse_expressions_2(env) + stat.allocate_lhs_temps(env) + +# def analyse_expressions(self, env): +# for stat in self.stats: +# stat.analyse_expressions_1(env, use_temp = 1) +# for stat in self.stats: +# stat.analyse_expressions_2(env) def generate_execution_code(self, code): for stat in self.stats: @@ -2787,10 +1511,10 @@ def analyse_expressions(self, env): for arg in self.args: - arg.analyse_target_expression(env) + arg.analyse_target_expression(env, None) if not arg.type.is_pyobject: error(arg.pos, "Deletion of non-Python object") - #env.recycle_pending_temps() # TEMPORARY + #arg.release_target_temp(env) def generate_execution_code(self, code): for arg in self.args: @@ -2818,9 +1542,10 @@ if not code.break_label: error(self.pos, "break statement not inside loop") else: - code.putln( - "goto %s;" % - code.break_label) + #code.putln( + # "goto %s;" % + # code.break_label) + code.put_goto(code.break_label) class ContinueStatNode(StatNode): @@ -2834,9 +1559,10 @@ elif not code.continue_label: error(self.pos, "continue statement not inside loop") else: - code.putln( - "goto %s;" % - code.continue_label) + #code.putln( + # "goto %s;" % + # code.continue_label) + code.put_goto(code.continue_label) class ReturnStatNode(StatNode): @@ -2872,8 +1598,6 @@ if not self.return_type: # error reported earlier return - for entry in self.temps_in_use: - code.put_var_decref_clear(entry) if self.value: self.value.generate_evaluation_code(code) self.value.make_owned_reference(code) @@ -2890,9 +1614,12 @@ "%s = %s;" % ( Naming.retval_cname, self.return_type.default_value)) - code.putln( - "goto %s;" % - code.return_label) + for entry in self.temps_in_use: + code.put_var_decref_clear(entry) + #code.putln( + # "goto %s;" % + # code.return_label) + code.put_goto(code.return_label) class RaiseStatNode(StatNode): @@ -3056,9 +1783,10 @@ "if (%s) {" % self.condition.result_code) self.body.generate_execution_code(code) - code.putln( - "goto %s;" % - end_label) + #code.putln( + # "goto %s;" % + # end_label) + code.put_goto(end_label) code.putln("}") @@ -3087,12 +1815,12 @@ old_loop_labels = code.new_loop_labels() code.putln( "while (1) {") - code.put_label(code.continue_label) self.condition.generate_evaluation_code(code) code.putln( "if (!%s) break;" % self.condition.result_code) self.body.generate_execution_code(code) + code.put_label(code.continue_label) code.putln("}") break_label = code.break_label code.set_loop_labels(old_loop_labels) @@ -3125,12 +1853,10 @@ self.item = ExprNodes.NextNode(self.iterator, env) self.item = self.item.coerce_to(self.target.type, env) self.item.allocate_temps(env) - self.target.allocate_target_temps(env) - self.item.release_temp(env) - self.target.release_target_temp(env) - #env.recycle_pending_temps() # TEMPORARY + self.target.allocate_target_temps(env, self.item) + #self.item.release_temp(env) + #self.target.release_target_temp(env) self.body.analyse_expressions(env) - #env.recycle_pending_temps() # TEMPORARY if self.else_clause: self.else_clause.analyse_expressions(env) self.iterator.release_temp(env) @@ -3140,10 +1866,10 @@ self.iterator.generate_evaluation_code(code) code.putln( "for (;;) {") - code.put_label(code.continue_label) self.item.generate_evaluation_code(code) self.target.generate_assignment_code(self.item, code) self.body.generate_execution_code(code) + code.put_label(code.continue_label) code.putln( "}") break_label = code.break_label @@ -3169,6 +1895,7 @@ # # Used internally: # + # is_py_target bool # loopvar_name string # py_loopvar_node PyTempNode or None @@ -3188,14 +1915,19 @@ if not (self.bound2.is_name or self.bound2.is_literal): self.bound2 = self.bound2.coerce_to_temp(env) target_type = self.target.type - if not (target_type.is_pyobject - or target_type.assignable_from(PyrexTypes.c_int_type)): - error(self.target.pos, - "Cannot assign integer to variable of type '%s'" % target_type) + if not (target_type.is_pyobject or target_type.is_int): + error(self.target.pos, + "Integer for-loop variable must be of type int or Python object") + #if not (target_type.is_pyobject + # or target_type.assignable_from(PyrexTypes.c_int_type)): + # error(self.target.pos, + # "Cannot assign integer to variable of type '%s'" % target_type) if target_type.is_int: + self.is_py_target = 0 self.loopvar_name = self.target.entry.cname self.py_loopvar_node = None else: + self.is_py_target = 1 c_loopvar_node = ExprNodes.TempNode(self.pos, PyrexTypes.c_long_type, env) c_loopvar_node.allocate_temps(env) @@ -3204,20 +1936,18 @@ ExprNodes.CloneNode(c_loopvar_node).coerce_to_pyobject(env) self.bound1.allocate_temps(env) self.bound2.allocate_temps(env) - if self.py_loopvar_node: + if self.is_py_target: self.py_loopvar_node.allocate_temps(env) - self.target.allocate_target_temps(env) - self.target.release_target_temp(env) - if self.py_loopvar_node: - self.py_loopvar_node.release_temp(env) + self.target.allocate_target_temps(env, self.py_loopvar_node) + #self.target.release_target_temp(env) + #self.py_loopvar_node.release_temp(env) self.body.analyse_expressions(env) - if self.py_loopvar_node: + if self.is_py_target: c_loopvar_node.release_temp(env) if self.else_clause: self.else_clause.analyse_expressions(env) self.bound1.release_temp(env) self.bound2.release_temp(env) - #env.recycle_pending_temps() # TEMPORARY def generate_execution_code(self, code): old_loop_labels = code.new_loop_labels() @@ -3294,9 +2024,10 @@ self.else_clause.generate_execution_code(code) code.putln( "}") - code.putln( - "goto %s;" % - end_label) + #code.putln( + # "goto %s;" % + # end_label) + code.put_goto(end_label) code.put_label(our_error_label) code.put_var_xdecrefs_clear(self.cleanup_list) default_clause_seen = 0 @@ -3308,9 +2039,10 @@ error(except_clause.pos, "Default except clause not last") except_clause.generate_handling_code(code, end_label) if not default_clause_seen: - code.putln( - "goto %s;" % - code.error_label) + #code.putln( + # "goto %s;" % + # code.error_label) + code.put_goto(code.error_label) code.put_label(end_label) @@ -3342,11 +2074,11 @@ self.exc_value = ExprNodes.ExcValueNode(self.pos, env) self.exc_value.allocate_temps(env) if self.target: - self.target.analyse_target_expression(env) - self.exc_value.release_temp(env) - if self.target: - self.target.release_target_temp(env) - #env.recycle_pending_temps() # TEMPORARY + self.target.analyse_target_expression(env, self.exc_value) + else: + self.exc_value.release_temp(env) + #if self.target: + # self.target.release_target_temp(env) self.body.analyse_expressions(env) def generate_handling_code(self, code, end_label): @@ -3375,9 +2107,10 @@ else: self.exc_value.generate_disposal_code(code) self.body.generate_execution_code(code) - code.putln( - "goto %s;" - % end_label) + #code.putln( + # "goto %s;" + # % end_label) + code.put_goto(end_label) code.putln( "}") @@ -3447,20 +2180,23 @@ #code.putln( # "int %s;" % # self.lineno_var) + code.use_label(catch_label) code.putln( "__pyx_why = 0; goto %s;" % catch_label) for i in range(len(new_labels)): - if new_labels[i] and new_labels[i] <> "": - if new_labels[i] == new_error_label: - self.put_error_catcher(code, - new_error_label, i+1, catch_label) - else: - code.putln( - "%s: __pyx_why = %s; goto %s;" % ( - new_labels[i], - i+1, - catch_label)) + new_label = new_labels[i] + if new_label and new_label <> "": + if new_label in code.labels_used: + if new_label == new_error_label: + self.put_error_catcher(code, + new_error_label, i+1, catch_label) + else: + code.putln( + "%s: __pyx_why = %s; goto %s;" % ( + new_label, + i+1, + catch_label)) code.put_label(catch_label) code.set_all_labels(old_labels) self.finally_clause.generate_execution_code(code) @@ -3471,6 +2207,7 @@ if old_labels[i] == old_error_label: self.put_error_uncatcher(code, i+1, old_error_label) else: + code.use_label(old_labels[i]) code.putln( "case %s: goto %s;" % ( i+1, @@ -3494,9 +2231,10 @@ code.putln( "%s = %s;" % ( self.lineno_var, Naming.lineno_cname)) - code.putln( - "goto %s;" % - catch_label) + #code.putln( + # "goto %s;" % + # catch_label) + code.put_goto(catch_label) code.putln( "}") @@ -3514,9 +2252,10 @@ code.putln( "%s = 0;" % var) - code.putln( - "goto %s;" % - error_label) + #code.putln( + # "goto %s;" % + # error_label) + code.put_goto(error_label) code.putln( "}") @@ -3596,11 +2335,10 @@ for name, target in self.items: if Options.intern_names: self.interned_items.append((env.intern(name), target)) - target.analyse_target_expression(env) - target.release_temp(env) + target.analyse_target_expression(env, None) + #target.release_target_temp(env) # was release_temp ?!? self.module.release_temp(env) self.item.release_temp(env) - #env.recycle_pending_temps() # TEMPORARY def generate_execution_code(self, code): self.module.generate_evaluation_code(code) @@ -3637,40 +2375,21 @@ typedef struct {const char *s; const void **p;} __Pyx_CApiTabEntry; /*proto*/ typedef struct {PyObject **p; char *s;} __Pyx_InternTabEntry; /*proto*/ typedef struct {PyObject **p; char *s; long n;} __Pyx_StringTabEntry; /*proto*/ -static PyObject *__Pyx_UnpackItem(PyObject *, Py_ssize_t); /*proto*/ -static int __Pyx_EndUnpack(PyObject *, Py_ssize_t); /*proto*/ -static int __Pyx_PrintItem(PyObject *); /*proto*/ -static int __Pyx_PrintNewline(void); /*proto*/ -static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb); /*proto*/ -static void __Pyx_ReRaise(void); /*proto*/ -static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list); /*proto*/ -static PyObject *__Pyx_GetExcValue(void); /*proto*/ -static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, char *name); /*proto*/ -static int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); /*proto*/ -static int __Pyx_GetStarArgs(PyObject **args, PyObject **kwds,\ - char *kwd_list[], Py_ssize_t nargs, PyObject **args2, PyObject **kwds2); /*proto*/ -static void __Pyx_WriteUnraisable(char *name); /*proto*/ -static void __Pyx_AddTraceback(char *funcname); /*proto*/ -static PyTypeObject *__Pyx_ImportType(char *module_name, char *class_name, long size); /*proto*/ -static int __Pyx_SetVtable(PyObject *dict, void *vtable); /*proto*/ -static int __Pyx_GetVtable(PyObject *dict, void *vtabptr); /*proto*/ -static PyObject *__Pyx_CreateClass(PyObject *bases, PyObject *dict, PyObject *name, char *modname); /*proto*/ -static int __Pyx_InternStrings(__Pyx_InternTabEntry *t); /*proto*/ -static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/ -static int __Pyx_InitCApi(PyObject *module); /*proto*/ -static int __Pyx_ImportModuleCApi(__Pyx_CApiTabEntry *t); /*proto*/ """ -get_name_predeclaration = \ -"static PyObject *__Pyx_GetName(PyObject *dict, char *name); /*proto*/" +#get_name_predeclaration = \ +#"static PyObject *__Pyx_GetName(PyObject *dict, char *name); /*proto*/" -get_name_interned_predeclaration = \ -"static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name); /*proto*/" +#get_name_interned_predeclaration = \ +#"static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name); /*proto*/" #------------------------------------------------------------------------------------ -printing_utility_code = \ -r""" +printing_utility_code = [ +""" +static int __Pyx_PrintItem(PyObject *); /*proto*/ +static int __Pyx_PrintNewline(void); /*proto*/ +""",r""" static PyObject *__Pyx_GetStdout(void) { PyObject *f = PySys_GetObject("stdout"); if (!f) { @@ -3711,14 +2430,16 @@ PyFile_SoftSpace(f, 0); return 0; } -""" +"""] #------------------------------------------------------------------------------------ # The following function is based on do_raise() from ceval.c. -raise_utility_code = \ +raise_utility_code = [ """ +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb); /*proto*/ +""",""" static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb) { Py_XINCREF(type); Py_XINCREF(value); @@ -3745,55 +2466,28 @@ Py_INCREF(type); Py_DECREF(tmp); } - if (PyString_CheckExact(type)) { - /* Raising builtin string is deprecated but still allowed -- - * do nothing. Raising an instance of a new-style str - * subclass is right out. */ + if (PyString_Check(type)) { if (PyErr_Warn(PyExc_DeprecationWarning, - "raising a string exception is deprecated")) - goto raise_error; + "raising a string exception is deprecated")) + goto raise_error; } else if (PyType_Check(type) || PyClass_Check(type)) - ; /* PyErr_NormalizeException(&type, &value, &tb); */ - else if (PyInstance_Check(type)) { + ; /*PyErr_NormalizeException(&type, &value, &tb);*/ + else { /* Raising an instance. The value should be a dummy. */ if (value != Py_None) { PyErr_SetString(PyExc_TypeError, - "instance exception may not have a separate value"); + "instance exception may not have a separate value"); goto raise_error; } - else { - /* Normalize to raise , */ - Py_DECREF(value); - value = type; + /* Normalize to raise , */ + Py_DECREF(value); + value = type; + if (PyInstance_Check(type)) type = (PyObject*) ((PyInstanceObject*)type)->in_class; - Py_INCREF(type); - } - } - else if (PyType_IsSubtype(type->ob_type, (PyTypeObject*)PyExc_Exception)) { - /* Raising a new-style object (in Py2.5). - The value should be a dummy. */ - if (value != Py_None) { - PyErr_SetString(PyExc_TypeError, - "instance exception may not have a separate value"); - goto raise_error; - } - else { - /* Normalize to raise , */ - Py_DECREF(value); - value = type; - type = (PyObject*) type->ob_type; - Py_INCREF(type); - } - } - else { - /* Not something you can raise. You get an exception - anyway, just not what you specified :-) */ - PyErr_Format(PyExc_TypeError, - "exceptions must be classes, instances, or " - "strings (deprecated), not %s", - type->ob_type->tp_name); - goto raise_error; + else + type = (PyObject*) type->ob_type; + Py_INCREF(type); } PyErr_Restore(type, value, tb); return; @@ -3803,12 +2497,14 @@ Py_XDECREF(tb); return; } -""" +"""] #------------------------------------------------------------------------------------ -reraise_utility_code = \ +reraise_utility_code = [ """ +static void __Pyx_ReRaise(void); /*proto*/ +""",""" static void __Pyx_ReRaise(void) { PyThreadState *tstate = PyThreadState_Get(); PyObject *type = tstate->exc_type; @@ -3819,12 +2515,14 @@ Py_XINCREF(tb); PyErr_Restore(type, value, tb); } -""" +"""] #------------------------------------------------------------------------------------ -arg_type_test_utility_code = \ +arg_type_test_utility_code = [ """ +static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, char *name); /*proto*/ +""",""" static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, char *name) { if (!type) { PyErr_Format(PyExc_SystemError, "Missing type object"); @@ -3837,7 +2535,7 @@ name, type->tp_name, obj->ob_type->tp_name); return 0; } -""" +"""] #------------------------------------------------------------------------------------ # @@ -3853,8 +2551,11 @@ # reference to the same dictionary is passed back in *kwds. # -get_starargs_utility_code = \ +get_starargs_utility_code = [ """ +static int __Pyx_GetStarArgs(PyObject **args, PyObject **kwds,\ + char *kwd_list[], Py_ssize_t nargs, PyObject **args2, PyObject **kwds2); /*proto*/ +""",""" static int __Pyx_GetStarArgs( PyObject **args, PyObject **kwds, @@ -3923,12 +2624,14 @@ Py_XDECREF(*kwds2); return -1; } -""" +"""] #------------------------------------------------------------------------------------ -unraisable_exception_utility_code = \ +unraisable_exception_utility_code = [ """ +static void __Pyx_WriteUnraisable(char *name); /*proto*/ +""",""" static void __Pyx_WriteUnraisable(char *name) { PyObject *old_exc, *old_val, *old_tb; PyObject *ctx; @@ -3939,12 +2642,14 @@ ctx = Py_None; PyErr_WriteUnraisable(ctx); } -""" +"""] #------------------------------------------------------------------------------------ -traceback_utility_code = \ +traceback_utility_code = [ """ +static void __Pyx_AddTraceback(char *funcname); /*proto*/ +""",""" #include "compile.h" #include "frameobject.h" #include "traceback.h" @@ -4006,12 +2711,14 @@ 'FILENAME': Naming.filename_cname, 'LINENO': Naming.lineno_cname, 'GLOBALS': Naming.module_cname -} +}] #------------------------------------------------------------------------------------ -type_import_utility_code = \ +type_import_utility_code = [ """ +static PyTypeObject *__Pyx_ImportType(char *module_name, char *class_name, long size); /*proto*/ +""",""" static PyTypeObject *__Pyx_ImportType(char *module_name, char *class_name, long size) { @@ -4061,12 +2768,14 @@ Py_XDECREF(py_name_list); return (PyTypeObject *)result; } -""" +"""] #------------------------------------------------------------------------------------ -set_vtable_utility_code = \ +set_vtable_utility_code = [ """ +static int __Pyx_SetVtable(PyObject *dict, void *vtable); /*proto*/ +""",""" static int __Pyx_SetVtable(PyObject *dict, void *vtable) { PyObject *pycobj = 0; int result; @@ -4085,12 +2794,14 @@ Py_XDECREF(pycobj); return result; } -""" +"""] #------------------------------------------------------------------------------------ -get_vtable_utility_code = \ -r""" +get_vtable_utility_code = [ +""" +static int __Pyx_GetVtable(PyObject *dict, void *vtabptr); /*proto*/ +""",r""" static int __Pyx_GetVtable(PyObject *dict, void *vtabptr) { int result; PyObject *pycobj; @@ -4110,12 +2821,14 @@ Py_XDECREF(pycobj); return result; } -""" +"""] #------------------------------------------------------------------------------------ -init_intern_tab_utility_code = \ +init_intern_tab_utility_code = [ """ +static int __Pyx_InternStrings(__Pyx_InternTabEntry *t); /*proto*/ +""",""" static int __Pyx_InternStrings(__Pyx_InternTabEntry *t) { while (t->p) { *t->p = PyString_InternFromString(t->s); @@ -4125,12 +2838,14 @@ } return 0; } -"""; +"""] #------------------------------------------------------------------------------------ -init_string_tab_utility_code = \ +init_string_tab_utility_code = [ """ +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/ +""",""" static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { while (t->p) { *t->p = PyString_FromStringAndSize(t->s, t->n - 1); @@ -4140,12 +2855,15 @@ } return 0; } -"""; +"""] #------------------------------------------------------------------------------------ -c_api_import_code = \ +c_api_import_code = [ """ +static int __Pyx_InitCApi(PyObject *module); /*proto*/ +static int __Pyx_ImportModuleCApi(__Pyx_CApiTabEntry *t); /*proto*/ +""",""" static int __Pyx_ImportModuleCApi(__Pyx_CApiTabEntry *t) { __Pyx_CApiTabEntry *api_t; while (t->s) { @@ -4176,4 +2894,5 @@ return result; } """ % {'API_TAB' : Naming.c_api_tab_cname} +] #------------------------------------------------------------------------------------ Modified: lxml/pyrex/Pyrex/Compiler/Parsing.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Parsing.py (original) +++ lxml/pyrex/Pyrex/Compiler/Parsing.py Wed Feb 14 15:20:32 2007 @@ -8,6 +8,7 @@ from Scanning import PyrexScanner import Nodes import ExprNodes +from ModuleNode import ModuleNode from Errors import error, InternalError def p_ident(s, message = "Expected an identifier"): @@ -413,13 +414,7 @@ elif sy == '`': return p_backquote_expr(s) elif sy == 'INT': - digits = s.systring - if digits[:2] == "0x": - value = long(digits[2:], 16) - elif digits[:1] == "0": - value = int(digits, 8) - else: - value = int(s.systring) + value = s.systring s.next() return ExprNodes.IntNode(pos, value = value) elif sy == 'LONG': @@ -517,7 +512,7 @@ elif c == '\n': pass else: - chars.append(systr[1:]) + chars.append(r'\\' + systr[1:]) elif sy == 'NEWLINE': chars.append(r'\n') elif sy == 'END_STRING': @@ -672,7 +667,6 @@ if len(nodes) == 1: return nodes[0] else: - #return Nodes.StatListNode(nodes[0].pos, stats = nodes) return Nodes.ParallelAssignmentNode(nodes[0].pos, stats = nodes) def flatten_parallel_assignments(input, output): @@ -1379,19 +1373,19 @@ if s.sy == '?': exc_check = 1 s.next() - exc_val = p_exception_value(s) + exc_val = p_simple_expr(s) #p_exception_value(s) return exc_val, exc_check -def p_exception_value(s): - sign = "" - if s.sy == "-": - sign = "-" - s.next() - if s.sy in ('INT', 'LONG', 'FLOAT', 'NULL'): - s.systring = sign + s.systring - return p_atom(s) - else: - s.error("Exception value must be an int or float literal or NULL") +#def p_exception_value(s): +# sign = "" +# if s.sy == "-": +# sign = "-" +# s.next() +# if s.sy in ('INT', 'LONG', 'FLOAT', 'NULL'): +# s.systring = sign + s.systring +# return p_atom(s) +# else: +# s.error("Exception value must be an int or float literal or NULL") c_arg_list_terminators = ('*', '**', '.', ')') c_arg_list_trailers = ('.', '*', '**') @@ -1784,7 +1778,7 @@ if s.sy <> 'EOF': s.error("Syntax error in statement [%s,%s]" % ( repr(s.sy), repr(s.systring))) - return Nodes.ModuleNode(pos, doc = doc, body = body) + return ModuleNode(pos, doc = doc, body = body) #---------------------------------------------- # Modified: lxml/pyrex/Pyrex/Compiler/PyrexTypes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/PyrexTypes.py (original) +++ lxml/pyrex/Pyrex/Compiler/PyrexTypes.py Wed Feb 14 15:20:32 2007 @@ -259,14 +259,14 @@ from_py_function = None -class CSimpleType(CType): - # - # Base class for all unstructured C types. - # - pass +#class CSimpleType(CType): +# # +# # Base class for all unstructured C types. +# # +# pass -class CVoidType(CSimpleType): +class CVoidType(CType): is_void = 1 def __repr__(self): @@ -313,9 +313,6 @@ u = "unsigned " return "" % (u, rank_to_type_name[self.rank]) - def assignable_from_resolved_type(self, src_type): - return src_type.is_numeric or src_type is error_type - def declaration_code(self, entity_code, for_display = 0, dll_linkage = None, pyrex = 0): if self.signed: @@ -324,8 +321,6 @@ u = "unsigned " base = public_decl(u + rank_to_type_name[self.rank], dll_linkage) return "%s %s" % (base, entity_code) - -# return "%s%s %s" % (u, rank_to_type_name[self.rank], entity_code) class CIntType(CNumericType): @@ -338,6 +333,9 @@ def __init__(self, rank, signed, pymemberdef_typecode = None, is_returncode = 0): CNumericType.__init__(self, rank, signed, pymemberdef_typecode) self.is_returncode = is_returncode + + def assignable_from_resolved_type(self, src_type): + return src_type.is_int or src_type.is_enum or src_type is error_type class CPySSizeTType(CIntType): @@ -379,6 +377,9 @@ def __init__(self, rank, pymemberdef_typecode = None): CNumericType.__init__(self, rank, 1, pymemberdef_typecode) + def assignable_from_resolved_type(self, src_type): + return src_type.is_numeric or src_type is error_type + class CArrayType(CType): # base_type CType Element type @@ -453,6 +454,8 @@ return 1 elif self.base_type.is_cfunction and other_type.is_cfunction: return self.base_type.same_as(other_type) + elif other_type.is_array: + return self.base_type.same_as(other_type.base_type) elif not other_type.is_ptr: return 0 elif self.base_type.is_void: @@ -614,14 +617,16 @@ return self.is_complete() -class CEnumType(CIntType): +class CEnumType(CType): # name string # cname string or None # typedef_flag boolean is_enum = 1 - signed = 1 - rank = 2 + #signed = 1 + #rank = 2 + to_py_function = "PyInt_FromLong" + from_py_function = "PyInt_AsLong" def __init__(self, name, cname, typedef_flag): self.name = name Modified: lxml/pyrex/Pyrex/Compiler/Symtab.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Symtab.py (original) +++ lxml/pyrex/Pyrex/Compiler/Symtab.py Wed Feb 14 15:20:32 2007 @@ -61,6 +61,7 @@ # interned_cname string C name of interned name string # pystring_cname string C name of Python version of string literal # is_interned boolean For string const entries, value is interned + # used boolean borrowed = 0 init = "" @@ -91,6 +92,7 @@ interned_cname = None pystring_cname = None is_interned = 0 + used = 0 def __init__(self, name, cname, type, pos = None, init = None): self.name = name @@ -351,6 +353,7 @@ # Add an entry for a string constant. cname = self.new_const_cname() entry = Entry("", cname, c_char_array_type, init = value) + entry.used = 1 self.const_entries.append(entry) return entry @@ -395,6 +398,7 @@ self.temp_counter = n + 1 cname = "%s%d" % (Naming.pyrex_prefix, n) entry = Entry("", cname, type) + entry.used = 1 if type.is_pyobject: entry.init = "0" self.cname_to_entry[entry.cname] = entry @@ -476,6 +480,7 @@ # intern_map {string : string} Mapping from Python names to interned strs # interned_names [string] Interned names pending generation of declarations # all_pystring_entries [Entry] Python string consts from all scopes + # types_imported {PyrexType : 1} Set of types for which import code generated def __init__(self, name, parent_module, context): self.parent_module = parent_module @@ -500,6 +505,7 @@ self.intern_map = {} self.interned_names = [] self.all_pystring_entries = [] + self.types_imported = {} def qualifying_scope(self): return self.parent_module @@ -565,6 +571,8 @@ # None if previously declared as something else. entry = self.lookup_here(name) if entry: + if entry.is_pyglobal and entry.as_module is scope: + return entry # Already declared as the same module if not (entry.is_pyglobal and not entry.as_module): error(pos, "'%s' redeclared" % name) return None @@ -956,6 +964,8 @@ if visibility in ('public', 'readonly'): if type.pymemberdef_typecode: self.public_attr_entries.append(entry) + if name == "__weakref__": + error(pos, "Special attribute __weakref__ cannot be exposed to Python") else: error(pos, "C attribute of type '%s' cannot be accessed from Python" % type) Modified: lxml/pyrex/Pyrex/Compiler/Version.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Version.py (original) +++ lxml/pyrex/Pyrex/Compiler/Version.py Wed Feb 14 15:20:32 2007 @@ -1 +1 @@ -version = '0.9.4.1' +version = '0.9.5.1a' Modified: lxml/pyrex/Pyrex/Mac/DarwinSystem.py ============================================================================== --- lxml/pyrex/Pyrex/Mac/DarwinSystem.py (original) +++ lxml/pyrex/Pyrex/Mac/DarwinSystem.py Wed Feb 14 15:20:32 2007 @@ -4,7 +4,8 @@ verbose = 0 gcc_pendantic = True -gcc_warnings_are_errors = False +gcc_warnings_are_errors = True +gcc_all_warnings = True import os from Pyrex.Utils import replace_suffix @@ -23,6 +24,9 @@ compiler_options.extend(["-pedantic", "-Wno-long-long"]) if gcc_warnings_are_errors: compiler_options.append("-Werror") +if gcc_all_warnings: + compiler_options.append("-Wall") + compiler_options.append("-Wno-unused-function") linkers = ["gcc", "g++"] linker_options = \ @@ -45,6 +49,7 @@ args = [compiler] + compiler_options + include_options + [c_file, "-o", o_file] if verbose_flag or verbose: print " ".join(args) + #print compiler, args ### status = os.spawnvp(os.P_WAIT, compiler, args) if status <> 0: raise CCompilerError("C compiler returned status %s" % status) Modified: lxml/pyrex/ToDo.txt ============================================================================== --- lxml/pyrex/ToDo.txt (original) +++ lxml/pyrex/ToDo.txt Wed Feb 14 15:20:32 2007 @@ -282,3 +282,5 @@ Make extension types use Py_TPFLAGS_HEAPTYPE so their __module__ will get set dynamically? + +Recognise return type of calls to extension type constructors. Modified: lxml/pyrex/setup.py ============================================================================== --- lxml/pyrex/setup.py (original) +++ lxml/pyrex/setup.py Wed Feb 14 15:20:32 2007 @@ -1,3 +1,7 @@ +# +# Pyrex setup file +# + from distutils.core import setup from distutils.sysconfig import get_python_lib import os @@ -5,28 +9,34 @@ compiler_dir = os.path.join(get_python_lib(prefix=''), 'Pyrex/Compiler') -if os.name == "posix": - scripts = ["bin/pyrexc"] -else: - scripts = ["pyrexc.py"] +def packages(): + result = [] + def find_packages(dir, parent_names): + for name in os.listdir(dir): + subdir = os.path.join(dir, name) + if os.path.isdir(subdir) and os.path.exists(os.path.join(subdir, "__init__.py")): + pkg_names = parent_names + [name] + result.append(".".join(pkg_names)) + find_packages(subdir, pkg_names) + source_dir = os.path.dirname(__file__) or os.curdir + find_packages(source_dir, []) + return result + +def scripts(): + if os.name == "posix": + return ["bin/pyrexc"] + else: + return ["pyrexc.py"] setup( - name = 'Pyrex', - version = version, - url = 'http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/', - author = 'Greg Ewing', - author_email = 'greg at cosc.canterbury.ac.nz', - scripts = scripts, - packages=[ - 'Pyrex', - 'Pyrex.Compiler', - 'Pyrex.Distutils', - 'Pyrex.Mac', - 'Pyrex.PC', - 'Pyrex.Plex' - ], - data_files=[ - (compiler_dir, ['Pyrex/Compiler/Lexicon.pickle']) + name = 'Pyrex', + version = version, + url = 'http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/', + author = 'Greg Ewing', + author_email = 'greg.ewing at canterbury.ac.nz', + scripts = scripts(), + packages = packages(), + data_files=[ + (compiler_dir, ['Pyrex/Compiler/Lexicon.pickle']) ] - ) - +) From scoder at codespeak.net Fri Feb 16 10:55:10 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 16 Feb 2007 10:55:10 +0100 (CET) Subject: [Lxml-checkins] r38998 - lxml/trunk Message-ID: <20070216095510.2DEE410090@code0.codespeak.net> Author: scoder Date: Fri Feb 16 10:55:09 2007 New Revision: 38998 Modified: lxml/trunk/Makefile Log: target 'valtest' to run tests in valgrind Modified: lxml/trunk/Makefile ============================================================================== --- lxml/trunk/Makefile (original) +++ lxml/trunk/Makefile Fri Feb 16 10:55:09 2007 @@ -20,6 +20,10 @@ PYTHONPATH=src $(PYTHON) selftest.py PYTHONPATH=src $(PYTHON) selftest2.py +valgrind_test_inplace: inplace + valgrind --tool=memcheck --leak-check=full --suppressions=valgrind-python.supp \ + $(PYTHON) test.py $(TESTFLAGS) $(TESTOPTS) + bench_inplace: inplace $(PYTHON) bench.py -i @@ -36,6 +40,8 @@ # XXX What should the default be? test: test_inplace +valtest: valgrind_test_inplace + bench: bench_inplace ftest: ftest_inplace From scoder at codespeak.net Fri Feb 16 11:01:27 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 16 Feb 2007 11:01:27 +0100 (CET) Subject: [Lxml-checkins] r38999 - lxml/trunk/src/lxml Message-ID: <20070216100127.D66DB10090@code0.codespeak.net> Author: scoder Date: Fri Feb 16 11:01:26 2007 New Revision: 38999 Modified: lxml/trunk/src/lxml/objectify.pyx Log: method docstring Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Fri Feb 16 11:01:26 2007 @@ -235,6 +235,10 @@ self.remove(child) def addattr(self, tag, value): + """Add a child value to the element. + + As opposed to append(), it sets a data value, not an element. + """ _appendValue(self, _buildChildTag(self, tag), value) def __getitem__(self, key): From scoder at codespeak.net Mon Feb 19 15:28:57 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 19 Feb 2007 15:28:57 +0100 (CET) Subject: [Lxml-checkins] r39206 - lxml/trunk/doc Message-ID: <20070219142857.B490710253@code0.codespeak.net> Author: scoder Date: Mon Feb 19 15:28:56 2007 New Revision: 39206 Modified: lxml/trunk/doc/build.txt Log: fixed rst underline length Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Mon Feb 19 15:28:56 2007 @@ -69,7 +69,7 @@ The setuptools approach ----------------------- +----------------------- Usually, building lxml is done through setuptools. Do a Subversion checkout (or download the source tar-ball and unpack it) and then type:: From scoder at codespeak.net Mon Feb 19 15:29:40 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 19 Feb 2007 15:29:40 +0100 (CET) Subject: [Lxml-checkins] r39207 - lxml/trunk/doc Message-ID: <20070219142940.A9A8710253@code0.codespeak.net> Author: scoder Date: Mon Feb 19 15:29:39 2007 New Revision: 39207 Modified: lxml/trunk/doc/FAQ.txt Log: note on possible speed-up when compiling lxml without threading Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Mon Feb 19 15:29:39 2007 @@ -73,7 +73,7 @@ of a pre-installed standard compiler and the missing package management. Both make it non-trivial to build lxml on this platform. We are trying hard to make lxml as platform-independent as possible and it is regularly tested on -Windows systems. However, we currently cannot provide Windows binaries +Windows systems. However, we currently cannot provide Windows binary distributions ourselves. From time to time, users of different environments kindly contribute binary @@ -122,10 +122,12 @@ simplicity of the API might suggest. The `benchmark page`_ has a comparison to other ElementTree implementations and a number of tips for performance tweaking. As with any Python application, the rule of thumb is: the more of -your processing runs in C, the faster your application gets. +your processing runs in C, the faster your application gets. See also the +section on threading_. .. _`a few caveats`: performance.html#the-elementtree-api .. _`benchmark page`: performance.html +.. _threading: #threading Why do I get errors about missing UCS4 symbols when installing lxml? @@ -236,6 +238,15 @@ multi-threading. +Would my single-threaded program run faster if I turned off threading? +---------------------------------------------------------------------- + +Can be. You can see for yourself by compiling lxml entirely without threading +support. Pass the ``--without-threading`` option to setup.py when building +lxml from source. + + + Parsing and Serialisation ========================= From scoder at codespeak.net Mon Feb 19 15:30:39 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 19 Feb 2007 15:30:39 +0100 (CET) Subject: [Lxml-checkins] r39208 - lxml/trunk/src/lxml Message-ID: <20070219143039.ACFA510253@code0.codespeak.net> Author: scoder Date: Mon Feb 19 15:30:39 2007 New Revision: 39208 Modified: lxml/trunk/src/lxml/etree_defs.h Log: fix: compilation failure when compiling without threads Modified: lxml/trunk/src/lxml/etree_defs.h ============================================================================== --- lxml/trunk/src/lxml/etree_defs.h (original) +++ lxml/trunk/src/lxml/etree_defs.h Mon Feb 19 15:30:39 2007 @@ -28,11 +28,6 @@ #define PyEval_RestoreThread(state) #define PyGILState_Ensure() (PyGILState_UNLOCKED) #define PyGILState_Release(state) - - #define PyThread_allocate_lock() (NULL) - #define PyThread_free_lock(lock) - #define PyThread_acquire_lock(lock, mode) (1) - #define PyThread_release_lock(lock) #endif #ifdef WITHOUT_THREADING From scoder at codespeak.net Mon Feb 19 15:44:07 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 19 Feb 2007 15:44:07 +0100 (CET) Subject: [Lxml-checkins] r39209 - lxml/trunk/doc Message-ID: <20070219144407.597B9101B6@code0.codespeak.net> Author: scoder Date: Mon Feb 19 15:44:05 2007 New Revision: 39209 Modified: lxml/trunk/doc/FAQ.txt Log: cleanup Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Mon Feb 19 15:44:05 2007 @@ -22,6 +22,7 @@ 3 Threading 3.1 Can I use threads to concurrently access the lxml API? 3.2 Does my program run faster if I use threads? + 3.3 Would my single-threaded program run faster if I turned off threading? 4 Parsing and Serialisation 4.1 Why doesn't the ``pretty_print`` option reformat my XML output? 4.2 Why can't lxml parse my XML from unicode strings? @@ -246,7 +247,6 @@ lxml from source. - Parsing and Serialisation ========================= From scoder at codespeak.net Mon Feb 19 21:31:05 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 19 Feb 2007 21:31:05 +0100 (CET) Subject: [Lxml-checkins] r39215 - lxml/trunk/src/lxml Message-ID: <20070219203105.B801810272@code0.codespeak.net> Author: scoder Date: Mon Feb 19 21:31:03 2007 New Revision: 39215 Modified: lxml/trunk/src/lxml/extensions.pxi Log: cleanup in _prepare_function_call() Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Mon Feb 19 21:31:03 2007 @@ -147,26 +147,28 @@ cdef python.PyObject* c_dict cdef python.PyObject* dict_result if c_ns_uri is NULL: - c_dict = self._function_cache + d = self._function_cache + c_dict = d else: c_dict = python.PyDict_GetItemString( self._function_cache_ns, c_ns_uri) + if c_dict is NULL: + d = {} + python.PyDict_SetItem(self._function_cache_ns, ns_uri_utf, d) + else: + d = c_dict + name_utf = c_name if c_dict is not NULL: - d = c_dict - dict_result = python.PyDict_GetItemString(d, c_name) + dict_result = python.PyDict_GetItem(d, name_utf) if dict_result is not NULL: function = dict_result self._called_function = function return function is not None - else: - d = {} - python.PyDict_SetItem(self._function_cache_ns, ns_uri_utf, d) # first time we look up this function, so the rest is less critical if c_ns_uri is not NULL: ns_uri_utf = c_ns_uri - name_utf = c_name if self._extensions is not None: dict_result = python.PyDict_GetItem( @@ -402,7 +404,6 @@ fref = "{%s}%s" % (rctxt.functionURI, rctxt.function) else: fref = rctxt.function - print "FAILED", fref xpath.xmlXPathErr(ctxt, xpath.XML_XPATH_UNKNOWN_FUNC_ERROR) exception = XPathFunctionError("XPath function '%s' not found" % fref) context._exc._store_exception(exception) From scoder at codespeak.net Tue Feb 20 13:38:26 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 20 Feb 2007 13:38:26 +0100 (CET) Subject: [Lxml-checkins] r39227 - lxml/trunk Message-ID: <20070220123826.21A5410298@code0.codespeak.net> Author: scoder Date: Tue Feb 20 13:38:25 2007 New Revision: 39227 Modified: lxml/trunk/MANIFEST.in Log: include missing files Modified: lxml/trunk/MANIFEST.in ============================================================================== --- lxml/trunk/MANIFEST.in (original) +++ lxml/trunk/MANIFEST.in Tue Feb 20 13:38:25 2007 @@ -1,5 +1,6 @@ exclude *.py -include setup.py test.py selftest.py selftest2.py +include setup.py ez_setup.py setupinfo.py versioninfo.py +include test.py selftest.py selftest2.py include update-error-constants.py include MANIFEST.in version.txt include CHANGES.txt CREDITS.txt INSTALL.txt LICENSES.txt README.txt TODO.txt From scoder at codespeak.net Tue Feb 20 13:58:37 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 20 Feb 2007 13:58:37 +0100 (CET) Subject: [Lxml-checkins] r39229 - lxml/trunk Message-ID: <20070220125837.079FC1029E@code0.codespeak.net> Author: scoder Date: Tue Feb 20 13:58:35 2007 New Revision: 39229 Modified: lxml/trunk/TODO.txt Log: cleanup in TODOs Modified: lxml/trunk/TODO.txt ============================================================================== --- lxml/trunk/TODO.txt (original) +++ lxml/trunk/TODO.txt Tue Feb 20 13:58:35 2007 @@ -4,10 +4,9 @@ Exposing libxml2 functionalities -------------------------------- -* See whether XInclude support can mimic ElementTree's API. - * Test XML entities, also in an ElementTree context. + In general ---------- @@ -15,67 +14,32 @@ * will namespace nodes of unknown namespaces be added (and never freed?) -Top level ---------- - -* ProcessingInstruction +* more testing on multi-threading -ElementInterface ------------------ ElementTree ----------- * _setroot(), even though this is not strictly a public method. + QName ----- * expose prefix support? -Features --------- -* Relaxed NG compact notation (rnc versus rng) support. May consider - integrating this: +Objectify +--------- + +* set special __attributes__ on ObjectifiedElement's as Python attributes, not + XML children - http://www.gnosis.cx/download/relax/ -Notes on implementing iterparse -------------------------------- +Features +-------- -"iterparse" will be (or will return) an iterable object, let's call it -IterParse for clarity. A class is basically the only way of implementing -iterators in Pyrex. For the internal SAX part, IterParse will likely work a -lot like lxml.sax.ElementTreeContentHandler. - -We'd need a custom wrapper to the default libxml2 SAX handler to intercept the -parse events (this means implementing C helper functions for the SAX events) -/after/ they were processed by libxml2. See xmlSAXVersion (SAX2.c) on how to -retrieve the SAX2 default parser structure. - -IterParse should pass chunks into the parser and buffer the events it -receives. When its __next__() method is called, it returns one event or passes -new chunks until there is an event to return. This is needed as IterParse has -to convert between libxml2 push (SAX) and Python pull (iter). - -As for the input to the libxml2 parser, there are two possible ways: one is to -pass data chunks in through xmlParseChunk and the other is to use -xmlCreateIOParserCtxt and implement xmlInputReadCallback (xmlio.h) to have -libxml2 request data by itself. However, xmlParseChunk allows us to control -how far libxml2 parses in advance, so this is preferable. - -Python events (start, end, start-ns, end-ns) are created as follows: - -* "*-ns" events must be extracted from the libxml2 xmlSAX2StartElementNs call -(passed in arguments "prefix"/"URI" and the char* array "namespaces"). They -must be stored on a stack to build the respective "end-ns" events. - -* "start" is somewhat tricky, as it would be a bad idea to allow modifications -of the XML structure during that iterator cycle. Maybe it's enough to document -that, but there may be ways to crash lxml with certain tree operations. Note -also that care has to be taken to prevent Python from garbage collecting the -element before the "end" event. The best way to do that is to store a Python -reference to that element on a stack. +* Relaxed NG compact notation (rnc versus rng) support. Currently not + supported by libxml2 (patch exists) -* "end" is simple then: pop the element from the stack and return it. +* setting a DTD for validation (maybe a ``DTD`` class like RelaxNG?) From scoder at codespeak.net Tue Feb 20 14:02:30 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 20 Feb 2007 14:02:30 +0100 (CET) Subject: [Lxml-checkins] r39231 - in lxml/trunk: . src/lxml Message-ID: <20070220130230.21B681029F@code0.codespeak.net> Author: scoder Date: Tue Feb 20 14:02:27 2007 New Revision: 39231 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/etree.pyx Log: synchronise access to _elementFactory to prevent race conditions during upcalls to the Python interpreter Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Feb 20 14:02:27 2007 @@ -29,6 +29,8 @@ Bugs fixed ---------- +* Element instantiation now uses locks to prevent race conditions with threads + * ElementTree.write() did not raise an exception when the file was not writable * Error handling could crash under Python <= 2.4.1 - fixed by disabling thread Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Tue Feb 20 14:02:27 2007 @@ -969,19 +969,41 @@ evaluator = XPathElementEvaluator(self, namespaces, extensions) return evaluator.evaluate(_path, **_variables) + +cdef python.PyThread_type_lock ELEMENT_CREATION_LOCK +if config.ENABLE_THREADING: + ELEMENT_CREATION_LOCK = python.PyThread_allocate_lock() +else: + ELEMENT_CREATION_LOCK = NULL + cdef _Element _elementFactory(_Document doc, xmlNode* c_node): + cdef python.PyThreadState* state cdef _Element result result = getProxy(c_node) if result is not None: return result if c_node is NULL: return None + + if config.ENABLE_THREADING: + state = python.PyEval_SaveThread() + python.PyThread_acquire_lock(ELEMENT_CREATION_LOCK, python.WAIT_LOCK) + python.PyEval_RestoreThread(state) + result = getProxy(c_node) + if result is not None: + python.PyThread_release_lock(ELEMENT_CREATION_LOCK) + return result + element_class = LOOKUP_ELEMENT_CLASS(ELEMENT_CLASS_LOOKUP_STATE, doc, c_node) result = element_class() result._doc = doc result._c_node = c_node registerProxy(result) + + if config.ENABLE_THREADING: + python.PyThread_release_lock(ELEMENT_CREATION_LOCK) + result._init() return result From scoder at codespeak.net Tue Feb 20 14:02:57 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 20 Feb 2007 14:02:57 +0100 (CET) Subject: [Lxml-checkins] r39232 - lxml/trunk Message-ID: <20070220130257.5C0311029F@code0.codespeak.net> Author: scoder Date: Tue Feb 20 14:02:53 2007 New Revision: 39232 Modified: lxml/trunk/CHANGES.txt Log: release date Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Feb 20 14:02:53 2007 @@ -2,7 +2,7 @@ lxml changelog ============== -1.2 (2007-02-14) +1.2 (2007-02-20) ================ Features added From scoder at codespeak.net Tue Feb 20 14:25:44 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 20 Feb 2007 14:25:44 +0100 (CET) Subject: [Lxml-checkins] r39234 - lxml/tag/lxml-1.2 Message-ID: <20070220132544.8E482100A7@code0.codespeak.net> Author: scoder Date: Tue Feb 20 14:25:43 2007 New Revision: 39234 Added: lxml/tag/lxml-1.2/ - copied from r39233, lxml/trunk/ Log: tag for lxml 1.2 From scoder at codespeak.net Tue Feb 20 14:26:55 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 20 Feb 2007 14:26:55 +0100 (CET) Subject: [Lxml-checkins] r39235 - lxml/branch/lxml-1.2 Message-ID: <20070220132655.8B8B6100A5@code0.codespeak.net> Author: scoder Date: Tue Feb 20 14:26:54 2007 New Revision: 39235 Added: lxml/branch/lxml-1.2/ - copied from r39234, lxml/tag/lxml-1.2/ Log: branch for lxml 1.2 maintenance From scoder at codespeak.net Tue Feb 20 14:36:43 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 20 Feb 2007 14:36:43 +0100 (CET) Subject: [Lxml-checkins] r39236 - lxml/trunk/doc Message-ID: <20070220133643.7E3FB1025D@code0.codespeak.net> Author: scoder Date: Tue Feb 20 14:36:42 2007 New Revision: 39236 Modified: lxml/trunk/doc/main.txt Log: links to 1.2 in main.txt Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Tue Feb 20 14:36:42 2007 @@ -29,6 +29,8 @@ .. _`installation instructions`: installation.html +* `lxml 1.2`_, released 2007-02-20 (`changes for 1.2`_) + * `lxml 1.1.2`_, released 2006-10-30 (`changes for 1.1.2`_) * `lxml 1.1.1`_, released 2006-09-21 (`changes for 1.1.1`_) @@ -61,6 +63,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 1.2`: lxml-1.2.tgz .. _`lxml 1.1.2`: lxml-1.1.2.tgz .. _`lxml 1.1.1`: lxml-1.1.1.tgz .. _`lxml 1.1`: lxml-1.1.tgz @@ -78,6 +81,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`CHANGES for 1.2`: changes-1.2.html .. _`CHANGES for 1.1.2`: changes-1.1.2.html .. _`CHANGES for 1.1.1`: changes-1.1.1.html .. _`CHANGES for 1.1`: changes-1.1.html From scoder at codespeak.net Tue Feb 20 14:38:27 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 20 Feb 2007 14:38:27 +0100 (CET) Subject: [Lxml-checkins] r39237 - lxml/branch/lxml-1.2/doc Message-ID: <20070220133827.4FD9D1025D@code0.codespeak.net> Author: scoder Date: Tue Feb 20 14:38:25 2007 New Revision: 39237 Modified: lxml/branch/lxml-1.2/doc/main.txt Log: merged in version notes from trunk Modified: lxml/branch/lxml-1.2/doc/main.txt ============================================================================== --- lxml/branch/lxml-1.2/doc/main.txt (original) +++ lxml/branch/lxml-1.2/doc/main.txt Tue Feb 20 14:38:25 2007 @@ -29,6 +29,8 @@ .. _`installation instructions`: installation.html +* `lxml 1.2`_, released 2007-02-20 (`changes for 1.2`_) + * `lxml 1.1.2`_, released 2006-10-30 (`changes for 1.1.2`_) * `lxml 1.1.1`_, released 2006-09-21 (`changes for 1.1.1`_) @@ -61,6 +63,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 1.2`: lxml-1.2.tgz .. _`lxml 1.1.2`: lxml-1.1.2.tgz .. _`lxml 1.1.1`: lxml-1.1.1.tgz .. _`lxml 1.1`: lxml-1.1.tgz @@ -78,6 +81,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`CHANGES for 1.2`: changes-1.2.html .. _`CHANGES for 1.1.2`: changes-1.1.2.html .. _`CHANGES for 1.1.1`: changes-1.1.1.html .. _`CHANGES for 1.1`: changes-1.1.html From scoder at codespeak.net Wed Feb 21 15:42:28 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 21 Feb 2007 15:42:28 +0100 (CET) Subject: [Lxml-checkins] r39286 - lxml/pyrex/Pyrex/Compiler Message-ID: <20070221144228.D3AA9101C6@code0.codespeak.net> Author: scoder Date: Wed Feb 21 15:42:26 2007 New Revision: 39286 Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py Log: reverted None-Node-Test patch Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/ExprNodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/ExprNodes.py Wed Feb 21 15:42:26 2007 @@ -2883,8 +2883,6 @@ return self.arg.result_code def generate_result_code(self, code): - if isinstance(self.arg, NoneNode): - return if self.type.typeobj_is_available(): code.putln( "if (!__Pyx_TypeTest(%s, %s)) %s" % ( From scoder at codespeak.net Wed Feb 21 15:56:27 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 21 Feb 2007 15:56:27 +0100 (CET) Subject: [Lxml-checkins] r39287 - lxml/trunk/src/lxml Message-ID: <20070221145627.2A876101CB@code0.codespeak.net> Author: scoder Date: Wed Feb 21 15:56:25 2007 New Revision: 39287 Modified: lxml/trunk/src/lxml/objectify.pyx Log: be less restrictive wrt attribute names in ObjectPath Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Wed Feb 21 15:56:25 2007 @@ -1130,7 +1130,7 @@ cdef object __MATCH_PATH_SEGMENT __MATCH_PATH_SEGMENT = re.compile( - r"(\.?)\s*(?:\{([^}]*)\})?\s*(\w+)\s*(?:\[\s*([-0-9]+)\s*\])?", + r"(\.?)\s*(?:\{([^}]*)\})?\s*([^.{}\[\]\s]+)\s*(?:\[\s*([-0-9]+)\s*\])?", re.U).match cdef _parseObjectPathString(path): From scoder at codespeak.net Wed Feb 21 15:56:49 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 21 Feb 2007 15:56:49 +0100 (CET) Subject: [Lxml-checkins] r39288 - lxml/trunk Message-ID: <20070221145649.6B35A101C8@code0.codespeak.net> Author: scoder Date: Wed Feb 21 15:56:48 2007 New Revision: 39288 Modified: lxml/trunk/CHANGES.txt Log: be less restrictive wrt attribute names in ObjectPath Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Feb 21 15:56:48 2007 @@ -2,6 +2,19 @@ lxml changelog ============== +Under Development +================= + +Features added +-------------- + +* The pattern for attribute names in ObjectPath was too restrictive + + +Bugs fixed +---------- + + 1.2 (2007-02-20) ================ From scoder at codespeak.net Wed Feb 21 16:01:55 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 21 Feb 2007 16:01:55 +0100 (CET) Subject: [Lxml-checkins] r39289 - in lxml/trunk: . src/lxml Message-ID: <20070221150155.36A66101B9@code0.codespeak.net> Author: scoder Date: Wed Feb 21 16:01:53 2007 New Revision: 39289 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/objectify.pyx Log: renamed ObjectifiedDataElement.__setText() to _setText() to make it easier to access Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Feb 21 16:01:53 2007 @@ -8,12 +8,15 @@ Features added -------------- -* The pattern for attribute names in ObjectPath was too restrictive - Bugs fixed ---------- +* renamed ObjectifiedDataElement.__setText() to _setText() to make it easier + to access + +* The pattern for attribute names in ObjectPath was too restrictive + 1.2 (2007-02-20) ================ Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Wed Feb 21 16:01:53 2007 @@ -493,7 +493,7 @@ def __repr__(self): return textOf(self._c_node) or '' - def __setText(self, s): + def _setText(self, s): """For use in subclasses only. Don't use unless you know what you are doing. """ From scoder at codespeak.net Wed Feb 21 16:04:04 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 21 Feb 2007 16:04:04 +0100 (CET) Subject: [Lxml-checkins] r39290 - lxml/trunk Message-ID: <20070221150404.E54B9101B9@code0.codespeak.net> Author: scoder Date: Wed Feb 21 16:04:02 2007 New Revision: 39290 Modified: lxml/trunk/TODO.txt Log: note: XSLT and extension functions should be refactored Modified: lxml/trunk/TODO.txt ============================================================================== --- lxml/trunk/TODO.txt (original) +++ lxml/trunk/TODO.txt Wed Feb 21 16:04:02 2007 @@ -16,6 +16,8 @@ * more testing on multi-threading +* the code on extension functions and XSLT needs some refactoring + ElementTree ----------- From scoder at codespeak.net Wed Feb 21 16:05:38 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 21 Feb 2007 16:05:38 +0100 (CET) Subject: [Lxml-checkins] r39291 - lxml/trunk/doc Message-ID: <20070221150538.14BA1101B9@code0.codespeak.net> Author: scoder Date: Wed Feb 21 16:05:34 2007 New Revision: 39291 Modified: lxml/trunk/doc/main.txt Log: first take on a major restructuring in doc/main.txt Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Wed Feb 21 16:05:34 2007 @@ -25,104 +25,68 @@ .. _`lxml at the Python cheeseshop`: http://cheeseshop.python.org/pypi/lxml/ .. _`this key`: pubkey.asc +The latest version is `lxml 1.2`_, released 2007-02-20 (`changes for 1.2`_). +`Older versions`_ are listed below. + +.. _`lxml 1.2`: lxml-1.2.tgz +.. _`CHANGES for 1.2`: changes-1.2.html +.. _`Older versions`: #old-versions + Please take a look at the `installation instructions`_! .. _`installation instructions`: installation.html -* `lxml 1.2`_, released 2007-02-20 (`changes for 1.2`_) +It's also possible to check out the latest development version of lxml +from svn directly, using a command like this:: -* `lxml 1.1.2`_, released 2006-10-30 (`changes for 1.1.2`_) + svn co http://codespeak.net/svn/lxml/trunk lxml -* `lxml 1.1.1`_, released 2006-09-21 (`changes for 1.1.1`_) +You can also `browse it through the web`_. Please read `how to build lxml +from source`_ first. The `latest CHANGES`_ of the developer version are also +accessible. You can check there if a bug you found has been fixed or a +feature you want has been implemented in the latest trunk version. -* `lxml 1.1`_, released 2006-09-13 (`changes for 1.1`_) +.. _`how to build lxml from source`: build.html +.. _`browse it through the web`: http://codespeak.net/svn/lxml +.. _`latest CHANGES`: http://codespeak.net/svn/lxml/trunk/CHANGES.txt -* `lxml 1.0.4`_, released 2006-09-09 (`changes for 1.0.4`_) -* `lxml 1.0.3`_, released 2006-08-08 (`changes for 1.0.3`_) +Documentation +------------- -* `lxml 1.0.2`_, released 2006-06-27 (`changes for 1.0.2`_) +* ElementTree: -* `lxml 1.0.1`_, released 2006-06-09 (`changes for 1.0.1`_) + * ElementTree_ API -* `lxml 1.0`_, released 2006-06-01 (`changes for 1.0`_) + * compatibility_ and differences of lxml.etree -* `lxml 0.9.2`_, released 2006-05-10 (`changes for 0.9.2`_) + * `benchmark results`_ -* `lxml 0.9.1`_, released 2006-03-30 (`changes for 0.9.1`_) +* lxml.etree: -* `lxml 0.9`_, released 2006-03-20 (`changes for 0.9`_) + * `lxml.etree specific API`_ documentation -* `lxml 0.8`_, released 2005-11-03 (`changes for 0.8`_) + * Python `extension functions`_ for XPath and XSLT -* `lxml 0.7`_, released 2005-06-15 (`changes for 0.7`_) + * `custom element classes`_ for custom XML APIs -* `lxml 0.6`_, released 2005-05-14 (`changes for 0.6`_) + * a `SAX compliant API`_ -* `lxml 0.5.1`_, released 2005-04-09 (`changes for 0.5.1`_) + * the `C-level API`_ for interfacing with external C/Pyrex modules -* `lxml 0.5`_, released 2005-04-08 - -.. _`lxml 1.2`: lxml-1.2.tgz -.. _`lxml 1.1.2`: lxml-1.1.2.tgz -.. _`lxml 1.1.1`: lxml-1.1.1.tgz -.. _`lxml 1.1`: lxml-1.1.tgz -.. _`lxml 1.0.4`: lxml-1.0.4.tgz -.. _`lxml 1.0.3`: lxml-1.0.3.tgz -.. _`lxml 1.0.2`: lxml-1.0.2.tgz -.. _`lxml 1.0.1`: lxml-1.0.1.tgz -.. _`lxml 1.0`: lxml-1.0.tgz -.. _`lxml 0.9.2`: lxml-0.9.2.tgz -.. _`lxml 0.9.1`: lxml-0.9.1.tgz -.. _`lxml 0.9`: lxml-0.9.tgz -.. _`lxml 0.8`: lxml-0.8.tgz -.. _`lxml 0.7`: lxml-0.7.tgz -.. _`lxml 0.6`: lxml-0.6.tgz -.. _`lxml 0.5.1`: lxml-0.5.1.tgz -.. _`lxml 0.5`: lxml-0.5.tgz +* lxml.objectify: -.. _`CHANGES for 1.2`: changes-1.2.html -.. _`CHANGES for 1.1.2`: changes-1.1.2.html -.. _`CHANGES for 1.1.1`: changes-1.1.1.html -.. _`CHANGES for 1.1`: changes-1.1.html -.. _`CHANGES for 1.0.4`: changes-1.0.4.html -.. _`CHANGES for 1.0.3`: changes-1.0.3.html -.. _`CHANGES for 1.0.2`: changes-1.0.2.html -.. _`CHANGES for 1.0.1`: changes-1.0.1.html -.. _`CHANGES for 1.0`: changes-1.0.html -.. _`CHANGES for 0.9.2`: changes-0.9.2.html -.. _`CHANGES for 0.9.1`: changes-0.9.1.html -.. _`CHANGES for 0.9`: changes-0.9.html -.. _`CHANGES for 0.8`: changes-0.8.html -.. _`CHANGES for 0.7`: changes-0.7.html -.. _`CHANGES for 0.6`: changes-0.6.html -.. _`CHANGES for 0.5.1`: changes-0.5.1.html + * `lxml.objectify`_ API documentation -It's also possible to check out the latest development version of lxml -from svn directly, using a command like this:: - - svn co http://codespeak.net/svn/lxml/trunk lxml - -You can also `browse it through the web`_. Please read `how to build lxml -from source`_ first. The `latest CHANGES`_ of the developer version are also -accessible. You can check there if a bug you found has been fixed or a -feature you want has been implemented in the latest trunk version. - -.. _`how to build lxml from source`: build.html -.. _`browse it through the web`: http://codespeak.net/svn/lxml -.. _`latest CHANGES`: http://codespeak.net/svn/lxml/trunk/CHANGES.txt - - -Documentation -------------- + * a brief comparison of `objectify and etree`_ lxml.etree follows the ElementTree_ API as much as possible, building it on -top of the native libxml2 tree. See also the `ElementTree compatibility -overview`_ and the `benchmark results`_ comparing lxml to the original +top of the native libxml2 tree. See also the ElementTree compatibility_ +overview and the `benchmark results`_ comparing lxml to the original ElementTree_ and cElementTree_ implementations. Right after the ElementTree_ documentation, the most important place to look -is the `lxml.etree API documentation`_. It describes how lxml extends the +is the `lxml.etree specific API`_ documentation. It describes how lxml extends the ElementTree API to expose libxml2 and libxslt specific functionality, such as XPath_, `Relax NG`_, `XML Schema`_, `XSLT`_, and `c14n`_. Python code can be called from XPath expressions and XSLT stylesheets through the use of @@ -143,8 +107,8 @@ .. _cElementTree: http://effbot.org/zone/celementtree.htm .. _`benchmark results`: performance.html -.. _`ElementTree compatibility overview`: compatibility.html -.. _`lxml.etree API documentation`: api.html +.. _`compatibility`: compatibility.html +.. _`lxml.etree specific API`: api.html .. _`extension functions`: extensions.html .. _`custom element classes`: element_classes.html .. _`SAX compliant API`: sax.html @@ -173,3 +137,72 @@ The lxml library is shipped under a BSD license. libxml2 and libxslt2 itself are shipped under the MIT license. There should therefore be no obstacle to using lxml in your codebase. + + +Old Versions +------------ + +* `lxml 1.1.2`_, released 2006-10-30 (`changes for 1.1.2`_) + +* `lxml 1.1.1`_, released 2006-09-21 (`changes for 1.1.1`_) + +* `lxml 1.1`_, released 2006-09-13 (`changes for 1.1`_) + +* `lxml 1.0.4`_, released 2006-09-09 (`changes for 1.0.4`_) + +* `lxml 1.0.3`_, released 2006-08-08 (`changes for 1.0.3`_) + +* `lxml 1.0.2`_, released 2006-06-27 (`changes for 1.0.2`_) + +* `lxml 1.0.1`_, released 2006-06-09 (`changes for 1.0.1`_) + +* `lxml 1.0`_, released 2006-06-01 (`changes for 1.0`_) + +* `lxml 0.9.2`_, released 2006-05-10 (`changes for 0.9.2`_) + +* `lxml 0.9.1`_, released 2006-03-30 (`changes for 0.9.1`_) + +* `lxml 0.9`_, released 2006-03-20 (`changes for 0.9`_) + +* `lxml 0.8`_, released 2005-11-03 (`changes for 0.8`_) + +* `lxml 0.7`_, released 2005-06-15 (`changes for 0.7`_) + +* `lxml 0.6`_, released 2005-05-14 (`changes for 0.6`_) + +* `lxml 0.5.1`_, released 2005-04-09 (`changes for 0.5.1`_) + +* `lxml 0.5`_, released 2005-04-08 + +.. _`lxml 1.1.2`: lxml-1.1.2.tgz +.. _`lxml 1.1.1`: lxml-1.1.1.tgz +.. _`lxml 1.1`: lxml-1.1.tgz +.. _`lxml 1.0.4`: lxml-1.0.4.tgz +.. _`lxml 1.0.3`: lxml-1.0.3.tgz +.. _`lxml 1.0.2`: lxml-1.0.2.tgz +.. _`lxml 1.0.1`: lxml-1.0.1.tgz +.. _`lxml 1.0`: lxml-1.0.tgz +.. _`lxml 0.9.2`: lxml-0.9.2.tgz +.. _`lxml 0.9.1`: lxml-0.9.1.tgz +.. _`lxml 0.9`: lxml-0.9.tgz +.. _`lxml 0.8`: lxml-0.8.tgz +.. _`lxml 0.7`: lxml-0.7.tgz +.. _`lxml 0.6`: lxml-0.6.tgz +.. _`lxml 0.5.1`: lxml-0.5.1.tgz +.. _`lxml 0.5`: lxml-0.5.tgz + +.. _`changes for 1.1.2`: changes-1.1.2.html +.. _`changes for 1.1.1`: changes-1.1.1.html +.. _`changes for 1.1`: changes-1.1.html +.. _`changes for 1.0.4`: changes-1.0.4.html +.. _`changes for 1.0.3`: changes-1.0.3.html +.. _`changes for 1.0.2`: changes-1.0.2.html +.. _`changes for 1.0.1`: changes-1.0.1.html +.. _`changes for 1.0`: changes-1.0.html +.. _`changes for 0.9.2`: changes-0.9.2.html +.. _`changes for 0.9.1`: changes-0.9.1.html +.. _`changes for 0.9`: changes-0.9.html +.. _`changes for 0.8`: changes-0.8.html +.. _`changes for 0.7`: changes-0.7.html +.. _`changes for 0.6`: changes-0.6.html +.. _`changes for 0.5.1`: changes-0.5.1.html From scoder at codespeak.net Wed Feb 21 16:43:47 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 21 Feb 2007 16:43:47 +0100 (CET) Subject: [Lxml-checkins] r39292 - in lxml/trunk: . doc Message-ID: <20070221154347.EA04410195@code0.codespeak.net> Author: scoder Date: Wed Feb 21 16:43:46 2007 New Revision: 39292 Added: lxml/trunk/doc/parsing.txt - copied, changed from r39233, lxml/trunk/doc/api.txt lxml/trunk/doc/validation.txt - copied, changed from r39233, lxml/trunk/doc/api.txt lxml/trunk/doc/xpathxslt.txt - copied, changed from r39233, lxml/trunk/doc/api.txt Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/api.txt lxml/trunk/doc/main.txt lxml/trunk/doc/mkhtml.py Log: first take on a major split-up of api.txt Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Feb 21 16:43:46 2007 @@ -17,6 +17,11 @@ * The pattern for attribute names in ObjectPath was too restrictive +Other changes +------------- + +* major restructuring in the documentation + 1.2 (2007-02-20) ================ Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Wed Feb 21 16:43:46 2007 @@ -4,23 +4,35 @@ lxml tries to follow established APIs wherever possible. Sometimes, however, the need to expose a feature in an easy way led to the invention of a new API. +This page describes the major differences and a few additions to the main +ElementTree API. + +Separate pages describe the support for `parsing XML`_, executing `XPath and +XSLT`_, `validating XML`_ and interfacing with other XML tools through the +`SAX-API`_. + +lxml is extremely extensible through `XPath functions in Python`_, custom +`Python element classes`_, custom `URL resolvers`_ and even `at the C-level`_. + +.. _`parsing XML`: parsing.html +.. _`XPath and XSLT`: xpathxslt.html +.. _`validating XML`: validation.html +.. _`SAX-API`: sax.html +.. _`XPath functions in Python`: extensions.html +.. _`Python element classes`: element_classes.html +.. _`at the C-level`: capi.html +.. _`URL resolvers`: resolvers.txt + .. contents:: .. - 1 lxml.etree - 2 Other Element APIs - 3 Trees and Documents - 4 Iteration - 5 Parsers - 6 iterparse and iterwalk - 7 Error handling on exceptions - 8 Python unicode strings - 9 XPath - 10 XSLT - 11 RelaxNG - 12 XMLSchema - 13 xinclude - 14 write_c14n on ElementTree + 1 lxml.etree + 2 Other Element APIs + 3 Trees and Documents + 4 Iteration + 5 Error handling on exceptions + 6 xinclude + 7 write_c14n on ElementTree lxml.etree @@ -167,208 +179,9 @@ ['d'] See also the section on the utility functions ``iterparse()`` and -``iterwalk()`` below. - - -Parsers -------- +``iterwalk()`` in the `parser documentation`_. -One of the differences is the parser. There is support for both XML and -(broken) HTML. Both are based on libxml2 and therefore only support options -that are backed by the library. Parsers take a number of keyword arguments. -The following is an example for namespace cleanup during parsing, first with -the default parser, then with a parametrized one:: - - >>> xml = '' - - >>> et = etree.parse(StringIO(xml)) - >>> print etree.tostring(et.getroot()) - - - >>> parser = etree.XMLParser(ns_clean=True) - >>> et = etree.parse(StringIO(xml), parser) - >>> print etree.tostring(et.getroot()) - - -HTML parsing is similarly simple. The parsers have a ``recover`` keyword -argument that the HTMLParser sets by default. It lets libxml2 try its best to -return something usable without raising an exception. You should use libxml2 -version 2.6.21 or newer to take advantage of this feature:: - - >>> broken_html = "test<body><h1>page title</h3>" - - >>> parser = etree.HTMLParser() - >>> et = etree.parse(StringIO(broken_html), parser) - - >>> print etree.tostring(et.getroot()) - <html><head><title>test

page title

- -Lxml has an HTML function, similar to the XML shortcut known from -ElementTree:: - - >>> html = etree.HTML(broken_html) - >>> print etree.tostring(html) - test

page title

- -The support for parsing broken HTML depends entirely on libxml2's recovery -algorithm. It is *not* the fault of lxml if you find documents that are so -heavily broken that the parser cannot handle them. There is also no guarantee -that the resulting tree will contain all data from the original document. The -parser may have to drop seriously broken parts when struggling to keep -parsing. Especially misplaced meta tags can suffer from this, which may lead -to encoding problems. - -The use of the libxml2 parsers makes some additional information available at -the API level. Currently, ElementTree objects can access the DOCTYPE -information provided by a parsed document, as well as the XML version and the -original encoding:: - - >>> pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN" - >>> sys_url = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" - >>> doctype_string = '' % (pub_id, sys_url) - >>> xml_header = '' - >>> xhtml = xml_header + doctype_string + '' - - >>> tree = etree.parse(StringIO(xhtml)) - >>> docinfo = tree.docinfo - >>> print docinfo.public_id - -//W3C//DTD XHTML 1.0 Transitional//EN - >>> print docinfo.system_url - http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd - >>> docinfo.doctype == doctype_string - True - - >>> print docinfo.xml_version - 1.0 - >>> print docinfo.encoding - ascii - - -iterparse and iterwalk ----------------------- - -As known from ElementTree, the ``iterparse()`` utility function returns an -iterator that generates parser events for an XML file (or file-like object), -while building the tree. The values are tuples ``(event-type, object)``. The -event types are 'start', 'end', 'start-ns' and 'end-ns'. - -The 'start' and 'end' events represent opening and closing elements and are -accompanied by the respective element. By default, only 'end' events are -generated:: - - >>> xml = '''\ - ... - ... text - ... texttail - ... - ... - ... ''' - - >>> context = etree.iterparse(StringIO(xml)) - >>> for action, elem in context: - ... print action, elem.tag - end element - end element - end {testns}empty-element - end root - -The resulting tree is available through the ``root`` property of the iterator:: - - >>> context.root.tag - 'root' - -The other types can be activated with the ``events`` keyword argument:: - - >>> events = ("start", "end") - >>> context = etree.iterparse(StringIO(xml), events=events) - >>> for action, elem in context: - ... print action, elem.tag - start root - start element - end element - start element - end element - start {testns}empty-element - end {testns}empty-element - end root - -You can modify the element and its descendants when handling the 'end' event. -To save memory, for example, you can remove subtrees that are no longer -needed:: - - >>> context = etree.iterparse(StringIO(xml)) - >>> for action, elem in context: - ... print len(elem), - ... elem.clear() - 0 0 0 3 - >>> context.root.getchildren() - [] - -**WARNING**: During the 'start' event, the descendants and following siblings -are not yet available and should not be accessed. During the 'end' event, the -element and its descendants can be freely modified, but its following siblings -should not be accessed. During either of the two events, you **must not** -modify or move the ancestors (parents) of the current element. You should -also avoid moving or discarding the element itself. The golden rule is: do -not touch anything that will have to be touched again by the parser later on. - -If you have elements with a long list of children in your XML file and want to -save more memory during parsing, you can clean up the preceding siblings of -the current element:: - - >>> for event, element in etree.iterparse(StringIO(xml)): - ... # ... do something with the element - ... element.clear() # clean up children - ... if element.getprevious(): # clean up preceding siblings - ... del element.getparent()[0] - -You can use ``while`` instead of ``if`` if you skipped siblings using the -``tag`` keyword argument. The more selective your tag is, however, the more -thought you will have to put into finding the right way to clean up the -elements that were skipped. Therefore, it is sometimes easier to traverse all -elements and do the tag selection by hand in the event handler code. - -The 'start-ns' and 'end-ns' events notify about namespace declarations and -generate tuples ``(prefix, URI)``:: - - >>> events = ("start-ns", "end-ns") - >>> context = etree.iterparse(StringIO(xml), events=events) - >>> for action, obj in context: - ... print action, obj - start-ns ('', 'testns') - end-ns None - -It is common practice to use a list as namespace stack and pop the last entry -on the 'end-ns' event. - -lxml.etree supports two extensions compared to ElementTree. It accepts a -``tag`` keyword argument just like ``element.getiterator(tag)``. This -restricts events to a specific tag or namespace. - - >>> context = etree.iterparse(StringIO(xml), tag="element") - >>> for action, elem in context: - ... print action, elem.tag - end element - end element - - >>> events = ("start", "end") - >>> context = etree.iterparse(StringIO(xml), events=events, tag="{testns}*") - >>> for action, elem in context: - ... print action, elem.tag - start {testns}empty-element - end {testns}empty-element - -The second extension is the ``iterwalk()`` function. It behaves exactly like -``iterparse()``, but works on Elements and ElementTrees:: - - >>> root = context.root - >>> context = etree.iterwalk(root, events=events, tag="element") - >>> for action, elem in context: - ... print action, elem.tag - start element - end element - start element - end element +.. _`parser documentation`: parsing.html#iterparse-and-iterwalk Error handling on exceptions @@ -415,467 +228,6 @@ etc. which are described in their respective sections below. -Python unicode strings ----------------------- - -lxml.etree has broader support for Python unicode strings than the ElementTree -library. First of all, where ElementTree would raise an exception, the -parsers in lxml.etree can handle unicode strings straight away. This is most -helpful for XML snippets embedded in source code using the ``XML()`` -function:: - - >>> uxml = u' \uf8d1 + \uf8d2 ' - >>> uxml - u' \uf8d1 + \uf8d2 ' - >>> root = etree.XML(uxml) - -This requires, however, that unicode strings do not specify a conflicting -encoding themselves and thus lie about their real encoding:: - - >>> etree.XML(u'\n' + uxml) - Traceback (most recent call last): - ... - ValueError: Unicode strings with encoding declaration are not supported. - -Similarly, you will get errors when you try the same with HTML data in a -unicode string that specifies a charset in a meta tag of the header. You -should generally avoid converting XML/HTML data to unicode before passing it -into the parsers. It is both slower and error prone. - -To serialize the result, you would normally use the ``tostring`` module -function, which serializes to plain ASCII by default or a number of other -encodings if asked for:: - - >>> etree.tostring(root) - '  +  ' - - >>> etree.tostring(root, 'UTF-8', xml_declaration=False) - ' \xef\xa3\x91 + \xef\xa3\x92 ' - -As an extension, lxml.etree has a new ``tounicode()`` function that you can -call on XML tree objects to retrieve a Python unicode representation:: - - >>> etree.tounicode(root) - u' \uf8d1 + \uf8d2 ' - - >>> el = etree.Element("test") - >>> etree.tounicode(el) - u'' - - >>> subel = etree.SubElement(el, "subtest") - >>> etree.tounicode(el) - u'' - - >>> et = etree.ElementTree(el) - >>> etree.tounicode(et) - u'' - -The result of ``tounicode()`` can be treated like any other Python unicode -string and then passed back into the parsers. However, if you want to save -the result to a file or pass it over the network, you should use ``write()`` -or ``tostring()`` with an encoding argument (typically UTF-8) to serialize the -XML. The main reason is that unicode strings returned by ``tounicode()`` -never have an XML declaration and therefore do not specify their encoding. -These strings are most likely not parsable by other XML libraries. - -In contrast, the ``tostring()`` function automatically adds a declaration as -needed that reflects the encoding of the returned string. This makes it -possible for other parsers to correctly parse the XML byte stream. Note that -using ``tostring()`` with UTF-8 is also considerably faster in most cases. - - -XPath ------ - -lxml.etree supports the simple path syntax of the ``findall()`` etc. methods -on ElementTree and Element, as known from the original ElementTree library. -As an extension, these classes also provide an ``xpath()`` method that -supports expressions in the complete XPath syntax. - -There are also specialized XPath evaluator classes that are more efficient for -frequent evaluation: ``XPath`` and ``XPathEvaluator``. See the `performance -comparison`_ to learn when to use which. Their semantics when used on -Elements and ElementTrees are the same as for the ``xpath()`` method described -here. - -.. _`performance comparison`: performance.html#xpath - -For ElementTree, the xpath method performs a global XPath query against the -document (if absolute) or against the root node (if relative):: - - >>> f = StringIO('') - >>> tree = etree.parse(f) - - >>> r = tree.xpath('/foo/bar') - >>> len(r) - 1 - >>> r[0].tag - 'bar' - - >>> r = tree.xpath('bar') - >>> r[0].tag - 'bar' - -When ``xpath()`` is used on an element, the XPath expression is evaluated -against the element (if relative) or against the root tree (if absolute):: - - >>> root = tree.getroot() - >>> r = root.xpath('bar') - >>> r[0].tag - 'bar' - - >>> bar = root[0] - >>> r = bar.xpath('/foo/bar') - >>> r[0].tag - 'bar' - - >>> tree = bar.getroottree() - >>> r = tree.xpath('/foo/bar') - >>> r[0].tag - 'bar' - -Optionally, you can provide a ``namespaces`` keyword argument, which should be -a dictionary mapping the namespace prefixes used in the XPath expression to -namespace URIs:: - - >>> f = StringIO('''\ - ... - ... Text - ... - ... ''') - >>> doc = etree.parse(f) - >>> r = doc.xpath('/t:foo/b:bar', {'t': 'http://codespeak.net/ns/test1', - ... 'b': 'http://codespeak.net/ns/test2'}) - >>> len(r) - 1 - >>> r[0].tag - '{http://codespeak.net/ns/test2}bar' - >>> r[0].text - 'Text' - -There is also an optional ``extensions`` argument which is used to define -`extension functions`_ in Python that are local to this evaluation. - -.. _`extension functions`: extensions.html - -The return values of XPath evaluations vary, depending on the XPath expression -used: - -* True or False, when the XPath expression has a boolean result - -* a float, when the XPath expression has a numeric result (integer or float) - -* a (unicode) string, when the XPath expression has a string result. - -* a list of items, when the XPath expression has a list as result. The items - may include elements, strings and tuples. Text nodes and attributes in the - result are returned as strings (the text node content or attribute value). - Comments are also returned as strings, enclosed by the usual ```` markers. Namespace declarations are returned as tuples of strings: - ``(prefix, URI)``. - -A related convenience method of ElementTree objects is ``getpath(element)``, -which returns a structural, absolute XPath expression to find that element:: - - >>> a = etree.Element("a") - >>> b = etree.SubElement(a, "b") - >>> c = etree.SubElement(a, "c") - >>> d1 = etree.SubElement(c, "d") - >>> d2 = etree.SubElement(c, "d") - - >>> tree = etree.ElementTree(c) - >>> print tree.getpath(d2) - /c/d[2] - >>> tree.xpath(tree.getpath(d2)) == [d2] - True - - -XSLT ----- - -lxml.etree introduces a new class, lxml.etree.XSLT. The class can be -given an ElementTree object to construct an XSLT transformer:: - - >>> f = StringIO('''\ - ... - ... - ... - ... - ... ''') - >>> xslt_doc = etree.parse(f) - >>> transform = etree.XSLT(xslt_doc) - -You can then run the transformation on an ElementTree document by simply -calling it, and this results in another ElementTree object:: - - >>> f = StringIO('Text') - >>> doc = etree.parse(f) - >>> result = transform(doc) - -The result object can be accessed like a normal ElementTree document:: - - >>> result.getroot().text - 'Text' - -but, as opposed to normal ElementTree objects, can also be turned into an (XML -or text) string by applying the str() function:: - - >>> str(result) - '\nText\n' - -The result is always a plain string, encoded as requested by the -``xsl:output`` element in the stylesheet. If you want a Python unicode string -instead, you should set this encoding to ``UTF-8`` (unless the `ASCII` default -is sufficient). This allows you to call the builtin ``unicode()`` function on -the result:: - - >>> unicode(result) - u'\nText\n' - -You can use other encodings at the cost of multiple recoding. Encodings that -are not supported by Python will result in an error:: - - >>> xslt_tree = etree.XML('''\ - ... - ... - ... - ... - ... - ... ''') - >>> transform = etree.XSLT(xslt_tree) - - >>> result = transform(doc) - >>> unicode(result) - Traceback (most recent call last): - [...] - LookupError: unknown encoding: UCS4 - -It is possible to pass parameters, in the form of XPath expressions, to the -XSLT template:: - - >>> xslt_tree = etree.XML('''\ - ... - ... - ... - ... - ... ''') - >>> transform = etree.XSLT(xslt_tree) - >>> f = StringIO('Text') - >>> doc = etree.parse(f) - -The parameters are passed as keyword parameters to the transform call. First -let's try passing in a simple string expression:: - - >>> result = transform(doc, a="'A'") - >>> str(result) - '\nA\n' - -Let's try a non-string XPath expression now:: - - >>> result = transform(doc, a="/a/b/text()") - >>> str(result) - '\nText\n' - -There's also a convenience method on the tree object for doing XSL -transformations. This is less efficient if you want to apply the same XSL -transformation to multiple documents, but is shorter to write for one-shot -operations, as you do not have to instantiate a stylesheet yourself:: - - >>> result = doc.xslt(xslt_tree, a="'A'") - >>> str(result) - '\nA\n' - -By default, XSLT supports all extension functions from libxslt and libexslt as -well as Python regular expressions through EXSLT. Note that some extensions -enable style sheets to read and write files on the local file system. See the -`document loader documentation`_ on how to deal with this. - -.. _`document loader documentation`: resolvers.html - -If you want to know how your stylesheet performed, pass the ``profile_run`` -keyword to the transform:: - - >>> result = transform(doc, a="/a/b/text()", profile_run=True) - >>> profile = result.xslt_profile - -The value of the ``xslt_profile`` property is an ElementTree with profiling -data about each template, similar to the following:: - - -